Index: include/llvm/CodeGen/AsmPrinter.h =================================================================== --- include/llvm/CodeGen/AsmPrinter.h +++ include/llvm/CodeGen/AsmPrinter.h @@ -121,9 +121,6 @@ using GOTEquivUsePair = std::pair; MapVector GlobalGOTEquivs; - /// Enable print [latency:throughput] in output. - bool EnablePrintSchedInfo = false; - private: MCSymbol *CurrentFnBegin = nullptr; MCSymbol *CurrentFnEnd = nullptr; Index: include/llvm/CodeGen/TargetSubtargetInfo.h =================================================================== --- include/llvm/CodeGen/TargetSubtargetInfo.h +++ include/llvm/CodeGen/TargetSubtargetInfo.h @@ -189,9 +189,6 @@ /// TargetLowering preference). It does not yet disable the postRA scheduler. virtual bool enableMachineScheduler() const; - /// Support printing of [latency:throughput] comment in output .S file. - virtual bool supportPrintSchedInfo() const { return false; } - /// True if the machine scheduler should disable the TLI preference /// for preRA scheduling with the source level scheduler. virtual bool enableMachineSchedDefaultSched() const { return true; } @@ -285,10 +282,6 @@ /// possible. virtual bool enableSubRegLiveness() const { return false; } - /// Returns string representation of scheduler comment - std::string getSchedInfoStr(const MachineInstr &MI) const; - std::string getSchedInfoStr(MCInst const &MCI) const override; - /// This is called after a .mir file was loaded. virtual void mirFileLoaded(MachineFunction &MF) const; }; Index: include/llvm/MC/MCObjectStreamer.h =================================================================== --- include/llvm/MC/MCObjectStreamer.h +++ include/llvm/MC/MCObjectStreamer.h @@ -115,8 +115,7 @@ void EmitSLEB128Value(const MCExpr *Value) override; void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override; void ChangeSection(MCSection *Section, const MCExpr *Subsection) override; - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - bool = false) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; /// Emit an instruction to a special fragment, because this instruction /// can change its size during relaxation. Index: include/llvm/MC/MCParser/MCAsmParser.h =================================================================== --- include/llvm/MC/MCParser/MCAsmParser.h +++ include/llvm/MC/MCParser/MCAsmParser.h @@ -129,9 +129,6 @@ /// Flag tracking whether any errors have been encountered. bool HadError = false; - /// Enable print [latency:throughput] in output file. - bool EnablePrintSchedInfo = false; - bool ShowParsedOperands = false; public: @@ -165,9 +162,6 @@ bool getShowParsedOperands() const { return ShowParsedOperands; } void setShowParsedOperands(bool Value) { ShowParsedOperands = Value; } - void setEnablePrintSchedInfo(bool Value) { EnablePrintSchedInfo = Value; } - bool shouldPrintSchedInfo() const { return EnablePrintSchedInfo; } - /// Run the parser on the input source buffer. virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false) = 0; Index: include/llvm/MC/MCStreamer.h =================================================================== --- include/llvm/MC/MCStreamer.h +++ include/llvm/MC/MCStreamer.h @@ -952,9 +952,7 @@ virtual void EmitAddrsigSym(const MCSymbol *Sym) {} /// Emit the given \p Instruction into the current section. - /// PrintSchedInfo == true then schedul comment should be added to output - virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - bool PrintSchedInfo = false); + virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI); /// Set the bundle alignment mode from now on in the section. /// The argument is the power of 2 to which the alignment is set. The Index: include/llvm/MC/MCSubtargetInfo.h =================================================================== --- include/llvm/MC/MCSubtargetInfo.h +++ include/llvm/MC/MCSubtargetInfo.h @@ -180,11 +180,6 @@ auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU); return Found != ProcDesc.end() && StringRef(Found->Key) == CPU; } - - /// Returns string representation of scheduler comment - virtual std::string getSchedInfoStr(MCInst const &MCI) const { - return {}; - } }; } // end namespace llvm Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -59,7 +59,6 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" @@ -142,10 +141,6 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed"); -static cl::opt - PrintSchedule("print-schedule", cl::Hidden, cl::init(false), - cl::desc("Print 'sched: [latency:throughput]' in .s output")); - char AsmPrinter::ID = 0; using gcp_map_type = DenseMap>; @@ -743,10 +738,7 @@ } /// emitComments - Pretty-print comments for instructions. -/// It returns true iff the sched comment was emitted. -/// Otherwise it returns false. -static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS, - AsmPrinter *AP) { +static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { const MachineFunction *MF = MI.getMF(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -754,7 +746,6 @@ int FI; const MachineFrameInfo &MFI = MF->getFrameInfo(); - bool Commented = false; auto getSize = [&MFI](const SmallVectorImpl &Accesses) { @@ -774,43 +765,24 @@ if (TII->isLoadFromStackSlotPostFE(MI, FI)) { if (MFI.isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); - CommentOS << MMO->getSize() << "-byte Reload"; - Commented = true; + CommentOS << MMO->getSize() << "-byte Reload\n"; } } else if (TII->hasLoadFromStackSlot(MI, Accesses)) { - if (auto Size = getSize(Accesses)) { - CommentOS << Size << "-byte Folded Reload"; - Commented = true; - } + if (auto Size = getSize(Accesses)) + CommentOS << Size << "-byte Folded Reload\n"; } else if (TII->isStoreToStackSlotPostFE(MI, FI)) { if (MFI.isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); - CommentOS << MMO->getSize() << "-byte Spill"; - Commented = true; + CommentOS << MMO->getSize() << "-byte Spill\n"; } } else if (TII->hasStoreToStackSlot(MI, Accesses)) { - if (auto Size = getSize(Accesses)) { - CommentOS << Size << "-byte Folded Spill"; - Commented = true; - } + if (auto Size = getSize(Accesses)) + CommentOS << Size << "-byte Folded Spill\n"; } // Check for spill-induced copies - if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) { - Commented = true; - CommentOS << " Reload Reuse"; - } - - if (Commented) { - if (AP->EnablePrintSchedInfo) { - // If any comment was added above and we need sched info comment then add - // this new comment just after the above comment w/o "\n" between them. - CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n"; - return true; - } - CommentOS << "\n"; - } - return false; + if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) + CommentOS << " Reload Reuse\n"; } /// emitImplicitDef - This method emits the specified machine instruction @@ -1098,10 +1070,8 @@ } } - if (isVerbose() && emitComments(MI, OutStreamer->GetCommentOS(), this)) { - MachineInstr *MIP = const_cast(&MI); - MIP->setAsmPrinterFlag(MachineInstr::NoSchedComment); - } + if (isVerbose()) + emitComments(MI, OutStreamer->GetCommentOS()); switch (MI.getOpcode()) { case TargetOpcode::CFI_INSTRUCTION: @@ -1633,11 +1603,6 @@ } ORE = &getAnalysis().getORE(); - - const TargetSubtargetInfo &STI = MF.getSubtarget(); - EnablePrintSchedInfo = PrintSchedule.getNumOccurrences() - ? PrintSchedule - : STI.supportPrintSchedInfo(); } namespace { Index: lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -18,7 +18,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" +//#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -154,7 +154,6 @@ " we don't have an asm parser for this target\n"); Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); - Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo); // Enable lexing Masm binary and hex integer literals in intel inline // assembly. if (Dialect == InlineAsm::AD_Intel) Index: lib/CodeGen/MachineCombiner.cpp =================================================================== --- lib/CodeGen/MachineCombiner.cpp +++ lib/CodeGen/MachineCombiner.cpp @@ -558,16 +558,13 @@ continue; LLVM_DEBUG(if (dump_intrs) { - dbgs() << "\tFor the Pattern (" << (int)P << ") these instructions could be removed\n"; - for (auto const *InstrPtr : DelInstrs) { - dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": "; + dbgs() << "\tFor the Pattern (" << (int)P + << ") these instructions could be removed\n"; + for (auto const *InstrPtr : DelInstrs) InstrPtr->print(dbgs(), false, false, false, TII); - } dbgs() << "\tThese instructions could replace the removed ones\n"; - for (auto const *InstrPtr : InsInstrs) { - dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": "; + for (auto const *InstrPtr : InsInstrs) InstrPtr->print(dbgs(), false, false, false, TII); - } }); bool SubstituteAlways = false; Index: lib/CodeGen/TargetSubtargetInfo.cpp =================================================================== --- lib/CodeGen/TargetSubtargetInfo.cpp +++ lib/CodeGen/TargetSubtargetInfo.cpp @@ -11,14 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/ADT/Optional.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include using namespace llvm; @@ -66,64 +58,4 @@ return false; } -static std::string createSchedInfoStr(unsigned Latency, double RThroughput) { - static const char *SchedPrefix = " sched: ["; - std::string Comment; - raw_string_ostream CS(Comment); - if (RThroughput != 0.0) - CS << SchedPrefix << Latency << format(":%2.2f", RThroughput) - << "]"; - else - CS << SchedPrefix << Latency << ":?]"; - CS.flush(); - return Comment; -} - -/// Returns string representation of scheduler comment -std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const { - if (MI.isPseudo() || MI.isTerminator()) - return std::string(); - // We don't cache TSchedModel because it depends on TargetInstrInfo - // that could be changed during the compilation - TargetSchedModel TSchedModel; - TSchedModel.init(this); - unsigned Latency = TSchedModel.computeInstrLatency(&MI); - - // Add extra latency due to forwarding delays. - const MCSchedClassDesc &SCDesc = *TSchedModel.resolveSchedClass(&MI); - Latency += - MCSchedModel::getForwardingDelayCycles(getReadAdvanceEntries(SCDesc)); - - double RThroughput = TSchedModel.computeReciprocalThroughput(&MI); - return createSchedInfoStr(Latency, RThroughput); -} - -/// Returns string representation of scheduler comment -std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const { - // We don't cache TSchedModel because it depends on TargetInstrInfo - // that could be changed during the compilation - TargetSchedModel TSchedModel; - TSchedModel.init(this); - unsigned Latency; - if (TSchedModel.hasInstrSchedModel()) { - Latency = TSchedModel.computeInstrLatency(MCI); - // Add extra latency due to forwarding delays. - const MCSchedModel &SM = *TSchedModel.getMCSchedModel(); - unsigned SClassID = getInstrInfo()->get(MCI.getOpcode()).getSchedClass(); - while (SM.getSchedClassDesc(SClassID)->isVariant()) - SClassID = resolveVariantSchedClass(SClassID, &MCI, SM.ProcID); - const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SClassID); - Latency += - MCSchedModel::getForwardingDelayCycles(getReadAdvanceEntries(SCDesc)); - } else if (TSchedModel.hasInstrItineraries()) { - auto *ItinData = TSchedModel.getInstrItineraries(); - Latency = ItinData->getStageLatency( - getInstrInfo()->get(MCI.getOpcode()).getSchedClass()); - } else - return std::string(); - double RThroughput = TSchedModel.computeReciprocalThroughput(MCI); - return createSchedInfoStr(Latency, RThroughput); -} - -void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { -} +void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { } Index: lib/MC/MCAsmStreamer.cpp =================================================================== --- lib/MC/MCAsmStreamer.cpp +++ lib/MC/MCAsmStreamer.cpp @@ -107,10 +107,7 @@ void AddComment(const Twine &T, bool EOL = true) override; /// Add a comment showing the encoding of an instruction. - /// If PrintSchedInfo is true, then the comment sched:[x:y] will be added to - /// the output if supported by the target. - void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &, - bool PrintSchedInfo); + void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &); /// Return a raw_ostream that comments can be written to. /// Unlike AddComment, you are required to terminate comments with \n if you @@ -311,8 +308,7 @@ void emitCGProfileEntry(const MCSymbolRefExpr *From, const MCSymbolRefExpr *To, uint64_t Count) override; - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - bool PrintSchedInfo) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; void EmitBundleAlignMode(unsigned AlignPow2) override; void EmitBundleLock(bool AlignToEnd) override; @@ -1736,8 +1732,7 @@ } void MCAsmStreamer::AddEncodingComment(const MCInst &Inst, - const MCSubtargetInfo &STI, - bool PrintSchedInfo) { + const MCSubtargetInfo &STI) { raw_ostream &OS = GetCommentOS(); SmallString<256> Code; SmallVector Fixups; @@ -1816,11 +1811,7 @@ } } } - OS << "]"; - // If we are not going to add fixup or schedule comments after this point - // then we have to end the current comment line with "\n". - if (Fixups.size() || !PrintSchedInfo) - OS << "\n"; + OS << "]\n"; for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { MCFixup &F = Fixups[i]; @@ -1832,18 +1823,15 @@ } void MCAsmStreamer::EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI, - bool PrintSchedInfo) { + const MCSubtargetInfo &STI) { assert(getCurrentSectionOnly() && "Cannot emit contents before setting section!"); // Show the encoding in a comment if we have a code emitter. - AddEncodingComment(Inst, STI, PrintSchedInfo); + AddEncodingComment(Inst, STI); // Show the MCInst if enabled. if (ShowInst) { - if (PrintSchedInfo) - GetCommentOS() << "\n"; Inst.dump_pretty(GetCommentOS(), InstPrinter.get(), "\n "); GetCommentOS() << "\n"; } @@ -1853,12 +1841,6 @@ else InstPrinter->printInst(&Inst, OS, "", STI); - if (PrintSchedInfo) { - std::string SI = STI.getSchedInfoStr(Inst); - if (!SI.empty()) - GetCommentOS() << SI; - } - StringRef Comments = CommentToEmit; if (Comments.size() && Comments.back() != '\n') GetCommentOS() << "\n"; Index: lib/MC/MCObjectStreamer.cpp =================================================================== --- lib/MC/MCObjectStreamer.cpp +++ lib/MC/MCObjectStreamer.cpp @@ -314,7 +314,7 @@ } void MCObjectStreamer::EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI, bool) { + const MCSubtargetInfo &STI) { getAssembler().getBackend().handleCodePaddingInstructionBegin(Inst); EmitInstructionImpl(Inst, STI); getAssembler().getBackend().handleCodePaddingInstructionEnd(Inst); Index: lib/MC/MCStreamer.cpp =================================================================== --- lib/MC/MCStreamer.cpp +++ lib/MC/MCStreamer.cpp @@ -952,8 +952,7 @@ } } -void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - bool) { +void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &) { // Scan for values. for (unsigned i = Inst.getNumOperands(); i--;) if (Inst.getOperand(i).isExpr()) Index: lib/Object/RecordStreamer.h =================================================================== --- lib/Object/RecordStreamer.h +++ lib/Object/RecordStreamer.h @@ -46,8 +46,7 @@ public: RecordStreamer(MCContext &Context, const Module &M); - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - bool) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; Index: lib/Object/RecordStreamer.cpp =================================================================== --- lib/Object/RecordStreamer.cpp +++ lib/Object/RecordStreamer.cpp @@ -82,7 +82,7 @@ RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); } void RecordStreamer::EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI, bool) { + const MCSubtargetInfo &STI) { MCStreamer::EmitInstruction(Inst, STI); } Index: lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp =================================================================== --- lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -102,8 +102,8 @@ /// This function is the one used to emit instruction data into the ELF /// streamer. We override it to add the appropriate mapping symbol if /// necessary. - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - bool) override { + void EmitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) override { EmitA64MappingSymbol(); MCELFStreamer::EmitInstruction(Inst, STI); } Index: lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp =================================================================== --- lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -484,8 +484,8 @@ /// This function is the one used to emit instruction data into the ELF /// streamer. We override it to add the appropriate mapping symbol if /// necessary. - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - bool) override { + void EmitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) override { if (IsThumb) EmitThumbMappingSymbol(); else Index: lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h =================================================================== --- lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h +++ lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h @@ -30,8 +30,7 @@ std::unique_ptr Emitter, MCAssembler *Assembler); - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - bool) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; void EmitSymbol(const MCInst &Inst); void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment, Index: lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp =================================================================== --- lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -59,7 +59,7 @@ MCII(createHexagonMCInstrInfo()) {} void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB, - const MCSubtargetInfo &STI, bool) { + const MCSubtargetInfo &STI) { assert(MCB.getOpcode() == Hexagon::BUNDLE); assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE); assert(HexagonMCInstrInfo::bundleSize(MCB) > 0); Index: lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h =================================================================== --- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -41,8 +41,7 @@ /// \p Inst is actually emitted. For example, we can inspect the operands and /// gather sufficient information that allows us to reason about the register /// usage for the translation unit. - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - bool = false) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; /// Overriding this function allows us to record all labels that should be /// marked as microMIPS. Based on this data marking is done in Index: lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp =================================================================== --- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -34,7 +34,7 @@ } void MipsELFStreamer::EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI, bool) { + const MCSubtargetInfo &STI) { MCELFStreamer::EmitInstruction(Inst, STI); MCContext &Context = getContext(); Index: lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp =================================================================== --- lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp +++ lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp @@ -143,8 +143,8 @@ public: /// This function is the one used to emit instruction data into the ELF /// streamer. We override it to mask dangerous instructions. - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, - bool) override { + void EmitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) override { // Sandbox indirect jumps. if (isIndirectJump(Inst)) { if (PendingCall) Index: lib/Target/X86/AsmParser/X86AsmInstrumentation.h =================================================================== --- lib/Target/X86/AsmParser/X86AsmInstrumentation.h +++ lib/Target/X86/AsmParser/X86AsmInstrumentation.h @@ -41,8 +41,7 @@ virtual void InstrumentAndEmitInstruction( const MCInst &Inst, SmallVectorImpl> &Operands, - MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out, - bool PrintSchedInfoEnabled); + MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out); protected: friend X86AsmInstrumentation * @@ -54,8 +53,7 @@ unsigned GetFrameRegGeneric(const MCContext &Ctx, MCStreamer &Out); - void EmitInstruction(MCStreamer &Out, const MCInst &Inst, - bool PrintSchedInfoEnabled = false); + void EmitInstruction(MCStreamer &Out, const MCInst &Inst); const MCSubtargetInfo *&STI; Index: lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp =================================================================== --- lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -194,8 +194,7 @@ // X86AsmInstrumentation implementation: void InstrumentAndEmitInstruction(const MCInst &Inst, OperandVector &Operands, MCContext &Ctx, const MCInstrInfo &MII, - MCStreamer &Out, - /* unused */ bool) override { + MCStreamer &Out) override { InstrumentMOVS(Inst, Operands, Ctx, MII, Out); if (RepPrefix) EmitInstruction(Out, MCInstBuilder(X86::REP_PREFIX)); @@ -1043,13 +1042,12 @@ void X86AsmInstrumentation::InstrumentAndEmitInstruction( const MCInst &Inst, OperandVector &Operands, MCContext &Ctx, - const MCInstrInfo &MII, MCStreamer &Out, bool PrintSchedInfoEnabled) { - EmitInstruction(Out, Inst, PrintSchedInfoEnabled); + const MCInstrInfo &MII, MCStreamer &Out) { + EmitInstruction(Out, Inst); } -void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out, const MCInst &Inst, - bool PrintSchedInfoEnabled) { - Out.EmitInstruction(Inst, *STI, PrintSchedInfoEnabled); +void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out, const MCInst &Inst) { + Out.EmitInstruction(Inst, *STI); } unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx, Index: lib/Target/X86/AsmParser/X86AsmParser.cpp =================================================================== --- lib/Target/X86/AsmParser/X86AsmParser.cpp +++ lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2865,8 +2865,7 @@ void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out) { Instrumentation->InstrumentAndEmitInstruction( - Inst, Operands, getContext(), MII, Out, - getParser().shouldPrintSchedInfo()); + Inst, Operands, getContext(), MII, Out); } bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, Index: lib/Target/X86/InstPrinter/X86InstComments.cpp =================================================================== --- lib/Target/X86/InstPrinter/X86InstComments.cpp +++ lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -1303,6 +1303,7 @@ OS << ']'; --i; // For loop increments element #. } + OS << '\n'; // We successfully added a comment to this instruction. return true; Index: lib/Target/X86/MCTargetDesc/X86BaseInfo.h =================================================================== --- lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -59,9 +59,7 @@ IP_HAS_REPEAT_NE = 4, IP_HAS_REPEAT = 8, IP_HAS_LOCK = 16, - NO_SCHED_INFO = 32, // Don't add sched comment to the current instr because - // it was already added - IP_HAS_NOTRACK = 64 + IP_HAS_NOTRACK = 32 }; } // end namespace X86; Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -100,9 +100,7 @@ } void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { - OutStreamer->EmitInstruction(Inst, getSubtargetInfo(), - EnablePrintSchedInfo && - !(Inst.getFlags() & X86::NO_SCHED_INFO)); + OutStreamer->EmitInstruction(Inst, getSubtargetInfo()); SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get()); } @@ -1860,8 +1858,7 @@ SmallVector Mask; DecodePSHUFBMask(C, Width, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask), - !EnablePrintSchedInfo); + OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); } break; } @@ -1933,8 +1930,7 @@ SmallVector Mask; DecodeVPERMILPMask(C, ElSize, Width, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask), - !EnablePrintSchedInfo); + OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); } break; } @@ -1965,8 +1961,7 @@ SmallVector Mask; DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask), - !EnablePrintSchedInfo); + OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask)); } break; } @@ -1983,8 +1978,7 @@ SmallVector Mask; DecodeVPPERMMask(C, Width, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask), - !EnablePrintSchedInfo); + OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask)); } break; } @@ -2001,7 +1995,7 @@ CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; if (auto *CF = dyn_cast(C)) { CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false); - OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); + OutStreamer->AddComment(CS.str()); } } break; @@ -2098,7 +2092,7 @@ } } CS << "]"; - OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); + OutStreamer->AddComment(CS.str()); } else if (auto *CV = dyn_cast(C)) { CS << "<"; for (int l = 0; l != NumLanes; ++l) { @@ -2110,7 +2104,7 @@ } } CS << ">"; - OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); + OutStreamer->AddComment(CS.str()); } } break; @@ -2197,14 +2191,12 @@ printConstant(C, CS); } CS << "]"; - OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); + OutStreamer->AddComment(CS.str()); } } MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); - if (MI->getAsmPrinterFlag(MachineInstr::NoSchedComment)) - TmpInst.setFlags(TmpInst.getFlags() | X86::NO_SCHED_INFO); // Stackmap shadows cannot include branch targets, so we can count the bytes // in a call towards the shadow, but must ensure that the no thread returns Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -833,9 +833,6 @@ /// Enable the MachineScheduler pass for all X86 subtargets. bool enableMachineScheduler() const override { return true; } - // TODO: Update the regression tests and return true. - bool supportPrintSchedInfo() const override { return false; } - bool enableEarlyIfConversion() const override; AntiDepBreakMode getAntiDepBreakMode() const override { Index: test/CodeGen/X86/3dnow-schedule.ll =================================================================== --- test/CodeGen/X86/3dnow-schedule.ll +++ test/CodeGen/X86/3dnow-schedule.ll @@ -1,394 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+3dnowa | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC - -define void @test_femms() optsize { -; CHECK-LABEL: test_femms: -; CHECK: # %bb.0: -; CHECK-NEXT: femms # sched: [31:10.33] -; CHECK-NEXT: retq # sched: [1:1.00] - call void @llvm.x86.mmx.femms() - ret void -} -declare void @llvm.x86.mmx.femms() nounwind readnone - -define i64 @test_pavgusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pavgusb: -; CHECK: # %bb.0: -; CHECK-NEXT: pavgusb %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [8:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pf2id(x86_mmx* %a0) optsize { -; CHECK-LABEL: test_pf2id: -; CHECK: # %bb.0: -; CHECK-NEXT: pf2id (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: pf2id %mm0, %mm0 # sched: [3:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = load x86_mmx, x86_mmx *%a0, align 8 - %2 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %1) - %3 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone - -define i64 @test_pf2iw(x86_mmx* %a0) optsize { -; CHECK-LABEL: test_pf2iw: -; CHECK: # %bb.0: -; CHECK-NEXT: pf2iw (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: pf2iw %mm0, %mm0 # sched: [3:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = load x86_mmx, x86_mmx *%a0, align 8 - %2 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %1) - %3 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone - -define i64 @test_pfacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfacc: -; CHECK: # %bb.0: -; CHECK-NEXT: pfacc %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfacc (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfadd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfadd: -; CHECK: # %bb.0: -; CHECK-NEXT: pfadd %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfadd (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfcmpeq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfcmpeq: -; CHECK: # %bb.0: -; CHECK-NEXT: pfcmpeq %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfcmpeq (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfcmpge(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfcmpge: -; CHECK: # %bb.0: -; CHECK-NEXT: pfcmpge %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfcmpge (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfcmpgt(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfcmpgt: -; CHECK: # %bb.0: -; CHECK-NEXT: pfcmpgt %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfcmpgt (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfmax(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfmax: -; CHECK: # %bb.0: -; CHECK-NEXT: pfmax %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfmax (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfmin(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfmin: -; CHECK: # %bb.0: -; CHECK-NEXT: pfmin %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfmin (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfmul(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfmul: -; CHECK: # %bb.0: -; CHECK-NEXT: pfmul %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfmul (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfnacc: -; CHECK: # %bb.0: -; CHECK-NEXT: pfnacc %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfnacc (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfpnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfpnacc: -; CHECK: # %bb.0: -; CHECK-NEXT: pfpnacc %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfpnacc (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfrcp(x86_mmx* %a0) optsize { -; CHECK-LABEL: test_pfrcp: -; CHECK: # %bb.0: -; CHECK-NEXT: pfrcp (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: pfrcp %mm0, %mm0 # sched: [3:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = load x86_mmx, x86_mmx *%a0, align 8 - %2 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %1) - %3 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone - -define i64 @test_pfrcpit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfrcpit1: -; CHECK: # %bb.0: -; CHECK-NEXT: pfrcpit1 %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfrcpit1 (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfrcpit2(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfrcpit2: -; CHECK: # %bb.0: -; CHECK-NEXT: pfrcpit2 %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfrcpit2 (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfrsqit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfrsqit1: -; CHECK: # %bb.0: -; CHECK-NEXT: pfrsqit1 %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfrsqit1 (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfrsqrt(x86_mmx* %a0) optsize { -; CHECK-LABEL: test_pfrsqrt: -; CHECK: # %bb.0: -; CHECK-NEXT: pfrsqrt (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: pfrsqrt %mm0, %mm0 # sched: [3:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = load x86_mmx, x86_mmx *%a0, align 8 - %2 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %1) - %3 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone - -define i64 @test_pfsub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfsub: -; CHECK: # %bb.0: -; CHECK-NEXT: pfsub %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfsub (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pfsubr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pfsubr: -; CHECK: # %bb.0: -; CHECK-NEXT: pfsubr %mm1, %mm0 # sched: [3:1.00] -; CHECK-NEXT: pfsubr (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pi2fd(x86_mmx* %a0) optsize { -; CHECK-LABEL: test_pi2fd: -; CHECK: # %bb.0: -; CHECK-NEXT: pi2fd (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: pi2fd %mm0, %mm0 # sched: [3:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = load x86_mmx, x86_mmx *%a0, align 8 - %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1) - %3 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone - -define i64 @test_pi2fw(x86_mmx* %a0) optsize { -; CHECK-LABEL: test_pi2fw: -; CHECK: # %bb.0: -; CHECK-NEXT: pi2fw (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: pi2fw %mm0, %mm0 # sched: [3:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = load x86_mmx, x86_mmx *%a0, align 8 - %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1) - %3 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone - -define i64 @test_pmulhrw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; CHECK-LABEL: test_pmulhrw: -; CHECK: # %bb.0: -; CHECK-NEXT: pmulhrw %mm1, %mm0 # sched: [5:1.00] -; CHECK-NEXT: pmulhrw (%rdi), %mm0 # sched: [10:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone - -define void @test_prefetch(i8* %a0) optsize { -; CHECK-LABEL: test_prefetch: -; CHECK: # %bb.0: -; CHECK-NEXT: #APP -; CHECK-NEXT: prefetch (%rdi) # sched: [5:0.50] -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: retq # sched: [1:1.00] - tail call void asm sideeffect "prefetch $0", "*m"(i8 *%a0) nounwind - ret void -} - -define void @test_prefetchw(i8* %a0) optsize { -; CHECK-LABEL: test_prefetchw: -; CHECK: # %bb.0: -; CHECK-NEXT: #APP -; CHECK-NEXT: prefetchw (%rdi) # sched: [5:0.50] -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: retq # sched: [1:1.00] - tail call void asm sideeffect "prefetchw $0", "*m"(i8 *%a0) nounwind - ret void -} - -define i64 @test_pswapd(x86_mmx* %a0) optsize { -; CHECK-LABEL: test_pswapd: -; CHECK: # %bb.0: -; CHECK-NEXT: pswapd (%rdi), %mm0 # mm0 = mem[1,0] sched: [6:1.00] -; CHECK-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] sched: [1:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [1:1.00] - %1 = load x86_mmx, x86_mmx *%a0, align 8 - %2 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %1) - %3 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone Index: test/CodeGen/X86/adx-schedule.ll =================================================================== --- test/CodeGen/X86/adx-schedule.ll +++ test/CodeGen/X86/adx-schedule.ll @@ -1,114 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+adx | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=SKL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define void @test_adcx(i32 %a0, i32* %a1, i64 %a2, i64* %a3) optsize { -; GENERIC-LABEL: test_adcx: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: adcxl %edi, %edi # sched: [2:0.67] -; GENERIC-NEXT: adcxq %rdx, %rdx # sched: [2:0.67] -; GENERIC-NEXT: adcxl (%rsi), %edi # sched: [7:0.67] -; GENERIC-NEXT: adcxq (%rcx), %rdx # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BROADWELL-LABEL: test_adcx: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: adcxl %edi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: adcxq %rdx, %rdx # sched: [1:0.50] -; BROADWELL-NEXT: adcxl (%rsi), %edi # sched: [6:0.50] -; BROADWELL-NEXT: adcxq (%rcx), %rdx # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_adcx: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: adcxl %edi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: adcxq %rdx, %rdx # sched: [1:0.50] -; SKYLAKE-NEXT: adcxl (%rsi), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: adcxq (%rcx), %rdx # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_adcx: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: adcxl %edi, %edi # sched: [2:0.50] -; KNL-NEXT: adcxq %rdx, %rdx # sched: [2:0.50] -; KNL-NEXT: adcxl (%rsi), %edi # sched: [7:0.50] -; KNL-NEXT: adcxq (%rcx), %rdx # sched: [7:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_adcx: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: adcxl %edi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: adcxq %rdx, %rdx # sched: [1:0.25] -; ZNVER1-NEXT: adcxl (%rsi), %edi # sched: [5:0.50] -; ZNVER1-NEXT: adcxq (%rcx), %rdx # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "adcx $0, $0 \0A\09 adcx $2, $2 \0A\09 adcx $1, $0 \0A\09 adcx $3, $2", "r,*m,r,*m"(i32 %a0, i32* %a1, i64 %a2, i64* %a3) nounwind - ret void -} -define void @test_adox(i32 %a0, i32* %a1, i64 %a2, i64* %a3) optsize { -; GENERIC-LABEL: test_adox: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: adoxl %edi, %edi # sched: [2:0.67] -; GENERIC-NEXT: adoxq %rdx, %rdx # sched: [2:0.67] -; GENERIC-NEXT: adoxl (%rsi), %edi # sched: [7:0.67] -; GENERIC-NEXT: adoxq (%rcx), %rdx # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BROADWELL-LABEL: test_adox: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: adoxl %edi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: adoxq %rdx, %rdx # sched: [1:0.50] -; BROADWELL-NEXT: adoxl (%rsi), %edi # sched: [6:0.50] -; BROADWELL-NEXT: adoxq (%rcx), %rdx # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_adox: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: adoxl %edi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: adoxq %rdx, %rdx # sched: [1:0.50] -; SKYLAKE-NEXT: adoxl (%rsi), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: adoxq (%rcx), %rdx # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_adox: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: adoxl %edi, %edi # sched: [2:0.50] -; KNL-NEXT: adoxq %rdx, %rdx # sched: [2:0.50] -; KNL-NEXT: adoxl (%rsi), %edi # sched: [7:0.50] -; KNL-NEXT: adoxq (%rcx), %rdx # sched: [7:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_adox: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: adoxl %edi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: adoxq %rdx, %rdx # sched: [1:0.25] -; ZNVER1-NEXT: adoxl (%rsi), %edi # sched: [5:0.50] -; ZNVER1-NEXT: adoxq (%rcx), %rdx # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "adox $0, $0 \0A\09 adox $2, $2 \0A\09 adox $1, $0 \0A\09 adox $3, $2", "r,*m,r,*m"(i32 %a0, i32* %a1, i64 %a2, i64* %a3) nounwind - ret void -} Index: test/CodeGen/X86/aes-schedule.ll =================================================================== --- test/CodeGen/X86/aes-schedule.ll +++ test/CodeGen/X86/aes-schedule.ll @@ -1,751 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+aes | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=+aes | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont -mattr=+aes | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 - -define <2 x i64> @test_aesdec(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_aesdec: -; GENERIC: # %bb.0: -; GENERIC-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: aesdec (%rdi), %xmm0 # sched: [13:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_aesdec: -; SLM: # %bb.0: -; SLM-NEXT: aesdec %xmm1, %xmm0 # sched: [8:5.00] -; SLM-NEXT: aesdec (%rdi), %xmm0 # sched: [8:5.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_aesdec: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [13:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_aesdec: -; SANDY: # %bb.0: -; SANDY-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_aesdec: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [13:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_aesdec: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_aesdec: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [12:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_aesdec: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [12:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_aesdec: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [4:1.00] -; SKYLAKE-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [10:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_aesdec: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_aesdec: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [4:1.00] -; SKX-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [10:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_aesdec: -; SKX: # %bb.0: -; SKX-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_aesdec: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [14:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_aesdec: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [14:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_aesdec: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_aesdec: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_aesdec: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [4:0.50] -; ZNVER1-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [11:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_aesdec: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; ZNVER1-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [11:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x i64>, <2 x i64> *%a2, align 16 - %2 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) - %3 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %2, <2 x i64> %1) - ret <2 x i64> %3 -} -declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_aesdeclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_aesdeclast: -; GENERIC: # %bb.0: -; GENERIC-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: aesdeclast (%rdi), %xmm0 # sched: [13:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_aesdeclast: -; SLM: # %bb.0: -; SLM-NEXT: aesdeclast %xmm1, %xmm0 # sched: [8:5.00] -; SLM-NEXT: aesdeclast (%rdi), %xmm0 # sched: [8:5.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_aesdeclast: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [13:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_aesdeclast: -; SANDY: # %bb.0: -; SANDY-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_aesdeclast: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [13:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_aesdeclast: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_aesdeclast: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [12:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_aesdeclast: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [12:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_aesdeclast: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [4:1.00] -; SKYLAKE-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [10:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_aesdeclast: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_aesdeclast: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [4:1.00] -; SKX-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [10:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_aesdeclast: -; SKX: # %bb.0: -; SKX-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_aesdeclast: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [14:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_aesdeclast: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [14:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_aesdeclast: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_aesdeclast: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_aesdeclast: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [4:0.50] -; ZNVER1-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [11:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_aesdeclast: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; ZNVER1-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [11:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x i64>, <2 x i64> *%a2, align 16 - %2 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) - %3 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %2, <2 x i64> %1) - ret <2 x i64> %3 -} -declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_aesenc(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_aesenc: -; GENERIC: # %bb.0: -; GENERIC-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: aesenc (%rdi), %xmm0 # sched: [13:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_aesenc: -; SLM: # %bb.0: -; SLM-NEXT: aesenc %xmm1, %xmm0 # sched: [8:5.00] -; SLM-NEXT: aesenc (%rdi), %xmm0 # sched: [8:5.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_aesenc: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [13:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_aesenc: -; SANDY: # %bb.0: -; SANDY-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_aesenc: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [13:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_aesenc: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_aesenc: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [12:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_aesenc: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [12:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_aesenc: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [4:1.00] -; SKYLAKE-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [10:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_aesenc: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_aesenc: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [4:1.00] -; SKX-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [10:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_aesenc: -; SKX: # %bb.0: -; SKX-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_aesenc: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [14:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_aesenc: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [14:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_aesenc: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_aesenc: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_aesenc: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [4:0.50] -; ZNVER1-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [11:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_aesenc: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; ZNVER1-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [11:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x i64>, <2 x i64> *%a2, align 16 - %2 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) - %3 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %2, <2 x i64> %1) - ret <2 x i64> %3 -} -declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_aesenclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_aesenclast: -; GENERIC: # %bb.0: -; GENERIC-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: aesenclast (%rdi), %xmm0 # sched: [13:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_aesenclast: -; SLM: # %bb.0: -; SLM-NEXT: aesenclast %xmm1, %xmm0 # sched: [8:5.00] -; SLM-NEXT: aesenclast (%rdi), %xmm0 # sched: [8:5.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_aesenclast: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [13:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_aesenclast: -; SANDY: # %bb.0: -; SANDY-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_aesenclast: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [13:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_aesenclast: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_aesenclast: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [12:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_aesenclast: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [12:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_aesenclast: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [4:1.00] -; SKYLAKE-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [10:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_aesenclast: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_aesenclast: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [4:1.00] -; SKX-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [10:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_aesenclast: -; SKX: # %bb.0: -; SKX-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_aesenclast: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [14:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_aesenclast: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [14:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_aesenclast: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_aesenclast: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_aesenclast: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [4:0.50] -; ZNVER1-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [11:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_aesenclast: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; ZNVER1-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [11:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x i64>, <2 x i64> *%a2, align 16 - %2 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) - %3 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %2, <2 x i64> %1) - ret <2 x i64> %3 -} -declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_aesimc(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_aesimc: -; GENERIC: # %bb.0: -; GENERIC-NEXT: aesimc %xmm0, %xmm1 # sched: [12:2.00] -; GENERIC-NEXT: aesimc (%rdi), %xmm0 # sched: [18:2.00] -; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_aesimc: -; SLM: # %bb.0: -; SLM-NEXT: aesimc %xmm0, %xmm1 # sched: [8:5.00] -; SLM-NEXT: aesimc (%rdi), %xmm0 # sched: [8:5.00] -; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_aesimc: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [12:2.00] -; SANDY-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [18:2.00] -; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_aesimc: -; SANDY: # %bb.0: -; SANDY-NEXT: vaesimc %xmm0, %xmm0 # sched: [12:2.00] -; SANDY-NEXT: vaesimc (%rdi), %xmm1 # sched: [18:2.00] -; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_aesimc: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [14:2.00] -; HASWELL-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [20:2.00] -; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_aesimc: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaesimc %xmm0, %xmm0 # sched: [14:2.00] -; HASWELL-NEXT: vaesimc (%rdi), %xmm1 # sched: [20:2.00] -; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_aesimc: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: aesimc (%rdi), %xmm1 # sched: [19:2.00] -; BROADWELL-SSE-NEXT: aesimc %xmm0, %xmm0 # sched: [14:2.00] -; BROADWELL-SSE-NEXT: por %xmm0, %xmm1 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_aesimc: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaesimc (%rdi), %xmm1 # sched: [19:2.00] -; BROADWELL-NEXT: vaesimc %xmm0, %xmm0 # sched: [14:2.00] -; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_aesimc: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [8:2.00] -; SKYLAKE-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [14:2.00] -; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_aesimc: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaesimc %xmm0, %xmm0 # sched: [8:2.00] -; SKYLAKE-NEXT: vaesimc (%rdi), %xmm1 # sched: [14:2.00] -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_aesimc: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [8:2.00] -; SKX-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [14:2.00] -; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_aesimc: -; SKX: # %bb.0: -; SKX-NEXT: vaesimc %xmm0, %xmm0 # sched: [8:2.00] -; SKX-NEXT: vaesimc (%rdi), %xmm1 # sched: [14:2.00] -; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_aesimc: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_aesimc: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaesimc (%rdi), %xmm1 # sched: [10:1.00] -; BDVER2-NEXT: vaesimc %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_aesimc: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_aesimc: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaesimc (%rdi), %xmm1 # sched: [7:1.00] -; BTVER2-NEXT: vaesimc %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_aesimc: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [4:0.50] -; ZNVER1-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [11:0.50] -; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_aesimc: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaesimc (%rdi), %xmm1 # sched: [11:0.50] -; ZNVER1-NEXT: vaesimc %xmm0, %xmm0 # sched: [4:0.50] -; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x i64>, <2 x i64> *%a1, align 16 - %2 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) - %3 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %1) - %4 = or <2 x i64> %2, %3 - ret <2 x i64> %4 -} -declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) - -define <2 x i64> @test_aeskeygenassist(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_aeskeygenassist: -; GENERIC: # %bb.0: -; GENERIC-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [8:3.67] -; GENERIC-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [8:3.33] -; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_aeskeygenassist: -; SLM: # %bb.0: -; SLM-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [8:5.00] -; SLM-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [8:5.00] -; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_aeskeygenassist: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [8:3.67] -; SANDY-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [8:3.33] -; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_aeskeygenassist: -; SANDY: # %bb.0: -; SANDY-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [8:3.67] -; SANDY-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [8:3.33] -; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_aeskeygenassist: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [29:7.00] -; HASWELL-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [34:7.00] -; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_aeskeygenassist: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [29:7.00] -; HASWELL-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [34:7.00] -; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_aeskeygenassist: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [29:7.00] -; BROADWELL-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [33:7.00] -; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_aeskeygenassist: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [29:7.00] -; BROADWELL-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [33:7.00] -; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_aeskeygenassist: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [20:6.00] -; SKYLAKE-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [25:6.00] -; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_aeskeygenassist: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [20:6.00] -; SKYLAKE-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [25:6.00] -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_aeskeygenassist: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [20:6.00] -; SKX-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [25:6.00] -; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_aeskeygenassist: -; SKX: # %bb.0: -; SKX-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [20:6.00] -; SKX-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [25:6.00] -; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_aeskeygenassist: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_aeskeygenassist: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [10:1.00] -; BDVER2-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_aeskeygenassist: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_aeskeygenassist: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [7:1.00] -; BTVER2-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_aeskeygenassist: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [4:0.50] -; ZNVER1-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [11:0.50] -; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_aeskeygenassist: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [11:0.50] -; ZNVER1-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [4:0.50] -; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x i64>, <2 x i64> *%a1, align 16 - %2 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) - %3 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %1, i8 7) - %4 = or <2 x i64> %2, %3 - ret <2 x i64> %4 -} -declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) Index: test/CodeGen/X86/avx-schedule.ll =================================================================== --- test/CodeGen/X86/avx-schedule.ll +++ test/CodeGen/X86/avx-schedule.ll @@ -1,6120 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_addpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_addpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_addpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_addpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_addpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_addpd: -; SKX: # %bb.0: -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_addpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_addpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_addpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fadd <4 x double> %a0, %a1 - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = fadd <4 x double> %1, %2 - ret <4 x double> %3 -} - -define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_addps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_addps: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_addps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_addps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_addps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_addps: -; SKX: # %bb.0: -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_addps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_addps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_addps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fadd <8 x float> %a0, %a1 - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = fadd <8 x float> %1, %2 - ret <8 x float> %3 -} - -define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_addsubpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_addsubpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_addsubpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_addsubpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_addsubpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_addsubpd: -; SKX: # %bb.0: -; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_addsubpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_addsubpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_addsubpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %1, <4 x double> %2) - ret <4 x double> %3 -} -declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone - -define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_addsubps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_addsubps: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_addsubps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_addsubps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_addsubps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_addsubps: -; SKX: # %bb.0: -; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_addsubps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_addsubps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_addsubps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %1, <8 x float> %2) - ret <8 x float> %3 -} -declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone - -define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_andnotpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_andnotpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_andnotpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_andnotpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_andnotpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_andnotpd: -; SKX: # %bb.0: -; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_andnotpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_andnotpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_andnotpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <4 x double> %a0 to <4 x i64> - %2 = bitcast <4 x double> %a1 to <4 x i64> - %3 = xor <4 x i64> %1, - %4 = and <4 x i64> %3, %2 - %5 = load <4 x double>, <4 x double> *%a2, align 32 - %6 = bitcast <4 x double> %5 to <4 x i64> - %7 = xor <4 x i64> %4, - %8 = and <4 x i64> %6, %7 - %9 = bitcast <4 x i64> %8 to <4 x double> - %10 = fadd <4 x double> %a1, %9 - ret <4 x double> %10 -} - -define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_andnotps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_andnotps: -; SANDY: # %bb.0: -; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_andnotps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_andnotps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_andnotps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_andnotps: -; SKX: # %bb.0: -; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_andnotps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_andnotps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_andnotps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <8 x float> %a0 to <4 x i64> - %2 = bitcast <8 x float> %a1 to <4 x i64> - %3 = xor <4 x i64> %1, - %4 = and <4 x i64> %3, %2 - %5 = load <8 x float>, <8 x float> *%a2, align 32 - %6 = bitcast <8 x float> %5 to <4 x i64> - %7 = xor <4 x i64> %4, - %8 = and <4 x i64> %6, %7 - %9 = bitcast <4 x i64> %8 to <8 x float> - %10 = fadd <8 x float> %a1, %9 - ret <8 x float> %10 -} - -define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_andpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_andpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_andpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_andpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_andpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_andpd: -; SKX: # %bb.0: -; SKX-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_andpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_andpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_andpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <4 x double> %a0 to <4 x i64> - %2 = bitcast <4 x double> %a1 to <4 x i64> - %3 = and <4 x i64> %1, %2 - %4 = load <4 x double>, <4 x double> *%a2, align 32 - %5 = bitcast <4 x double> %4 to <4 x i64> - %6 = and <4 x i64> %3, %5 - %7 = bitcast <4 x i64> %6 to <4 x double> - %8 = fadd <4 x double> %a1, %7 - ret <4 x double> %8 -} - -define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_andps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_andps: -; SANDY: # %bb.0: -; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_andps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_andps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_andps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_andps: -; SKX: # %bb.0: -; SKX-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_andps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_andps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_andps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <8 x float> %a0 to <4 x i64> - %2 = bitcast <8 x float> %a1 to <4 x i64> - %3 = and <4 x i64> %1, %2 - %4 = load <8 x float>, <8 x float> *%a2, align 32 - %5 = bitcast <8 x float> %4 to <4 x i64> - %6 = and <4 x i64> %3, %5 - %7 = bitcast <4 x i64> %6 to <8 x float> - %8 = fadd <8 x float> %a1, %7 - ret <8 x float> %8 -} - -define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_blendpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] -; GENERIC-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50] -; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_blendpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] -; SANDY-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50] -; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_blendpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] -; HASWELL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50] -; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blendpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] -; BROADWELL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [7:0.50] -; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blendpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] -; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50] -; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_blendpd: -; SKX: # %bb.0: -; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] -; SKX-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_blendpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [2:1.00] -; BDVER2-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [7:1.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_blendpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00] -; BTVER2-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [6:2.00] -; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_blendpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] -; ZNVER1-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = shufflevector <4 x double> %1, <4 x double> %2, <4 x i32> - %4 = fadd <4 x double> %1, %3 - ret <4 x double> %4 -} - -define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_blendps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] -; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_blendps: -; SANDY: # %bb.0: -; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] -; SANDY-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] -; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_blendps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] -; HASWELL-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] -; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blendps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] -; BROADWELL-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [7:0.50] -; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blendps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] -; SKYLAKE-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] -; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_blendps: -; SKX: # %bb.0: -; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] -; SKX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_blendps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [2:1.00] -; BDVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [7:1.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_blendps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00] -; BTVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [6:2.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_blendps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] -; ZNVER1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] -; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = shufflevector <8 x float> %a1, <8 x float> %2, <8 x i32> - %4 = fadd <8 x float> %1, %3 - ret <8 x float> %4 -} - -define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) { -; GENERIC-LABEL: test_blendvpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; GENERIC-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_blendvpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_blendvpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; HASWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blendvpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BROADWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blendvpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] -; SKYLAKE-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_blendvpd: -; SKX: # %bb.0: -; SKX-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] -; SKX-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_blendvpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:3.00] -; BDVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:3.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_blendvpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00] -; BTVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_blendvpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) - %2 = load <4 x double>, <4 x double> *%a3, align 32 - %3 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %1, <4 x double> %2, <4 x double> %a2) - ret <4 x double> %3 -} -declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone - -define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) { -; GENERIC-LABEL: test_blendvps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; GENERIC-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_blendvps: -; SANDY: # %bb.0: -; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_blendvps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; HASWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blendvps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BROADWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blendvps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] -; SKYLAKE-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_blendvps: -; SKX: # %bb.0: -; SKX-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] -; SKX-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_blendvps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:3.00] -; BDVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:3.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_blendvps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00] -; BTVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_blendvps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) - %2 = load <8 x float>, <8 x float> *%a3, align 32 - %3 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %1, <8 x float> %2, <8 x float> %a2) - ret <8 x float> %3 -} -declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone - -define <8 x float> @test_broadcastf128(<4 x float> *%a0) { -; GENERIC-LABEL: test_broadcastf128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_broadcastf128: -; SANDY: # %bb.0: -; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_broadcastf128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_broadcastf128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_broadcastf128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_broadcastf128: -; SKX: # %bb.0: -; SKX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_broadcastf128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_broadcastf128: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_broadcastf128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <4 x float>, <4 x float> *%a0, align 32 - %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> - ret <8 x float> %2 -} - -define <4 x double> @test_broadcastsd_ymm(double *%a0) { -; GENERIC-LABEL: test_broadcastsd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_broadcastsd_ymm: -; SANDY: # %bb.0: -; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_broadcastsd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_broadcastsd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_broadcastsd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_broadcastsd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_broadcastsd_ymm: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_broadcastsd_ymm: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_broadcastsd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load double, double *%a0, align 8 - %2 = insertelement <4 x double> undef, double %1, i32 0 - %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> zeroinitializer - ret <4 x double> %3 -} - -define <4 x float> @test_broadcastss(float *%a0) { -; GENERIC-LABEL: test_broadcastss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_broadcastss: -; SANDY: # %bb.0: -; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_broadcastss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_broadcastss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_broadcastss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_broadcastss: -; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_broadcastss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_broadcastss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_broadcastss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load float, float *%a0, align 4 - %2 = insertelement <4 x float> undef, float %1, i32 0 - %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define <8 x float> @test_broadcastss_ymm(float *%a0) { -; GENERIC-LABEL: test_broadcastss_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_broadcastss_ymm: -; SANDY: # %bb.0: -; SANDY-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_broadcastss_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_broadcastss_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_broadcastss_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_broadcastss_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_broadcastss_ymm: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_broadcastss_ymm: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_broadcastss_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load float, float *%a0, align 4 - %2 = insertelement <8 x float> undef, float %1, i32 0 - %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> zeroinitializer - ret <8 x float> %3 -} - -define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_cmppd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; GENERIC-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; GENERIC-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cmppd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmppd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmppd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmppd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50] -; SKYLAKE-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmppd: -; SKX: # %bb.0: -; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50] -; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmppd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00] -; BDVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BDVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmppd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmppd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; ZNVER1-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; ZNVER1-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fcmp oeq <4 x double> %a0, %a1 - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = fcmp oeq <4 x double> %a0, %2 - %4 = sext <4 x i1> %1 to <4 x i64> - %5 = sext <4 x i1> %3 to <4 x i64> - %6 = or <4 x i64> %4, %5 - %7 = bitcast <4 x i64> %6 to <4 x double> - ret <4 x double> %7 -} - -define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_cmpps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; GENERIC-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; GENERIC-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cmpps: -; SANDY: # %bb.0: -; SANDY-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmpps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmpps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmpps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50] -; SKYLAKE-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmpps: -; SKX: # %bb.0: -; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50] -; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmpps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00] -; BDVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BDVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmpps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmpps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; ZNVER1-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; ZNVER1-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fcmp oeq <8 x float> %a0, %a1 - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = fcmp oeq <8 x float> %a0, %2 - %4 = sext <8 x i1> %1 to <8 x i32> - %5 = sext <8 x i1> %3 to <8 x i32> - %6 = or <8 x i32> %4, %5 - %7 = bitcast <8 x i32> %6 to <8 x float> - ret <8 x float> %7 -} - -define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { -; GENERIC-LABEL: test_cvtdq2pd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00] -; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtdq2pd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00] -; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cvtdq2pd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00] -; HASWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [12:1.00] -; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtdq2pd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [11:1.00] -; BROADWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtdq2pd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] -; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00] -; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtdq2pd: -; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cvtdq2pd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:2.00] -; BDVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [8:2.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cvtdq2pd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:2.00] -; BTVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cvtdq2pd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [5:1.00] -; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sitofp <4 x i32> %a0 to <4 x double> - %2 = load <4 x i32>, <4 x i32> *%a1, align 16 - %3 = sitofp <4 x i32> %2 to <4 x double> - %4 = fadd <4 x double> %1, %3 - ret <4 x double> %4 -} - -define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { -; GENERIC-LABEL: test_cvtdq2ps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00] -; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtdq2ps: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:0.50] -; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cvtdq2ps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00] -; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtdq2ps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [9:1.00] -; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtdq2ps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50] -; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtdq2ps: -; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cvtdq2ps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [9:2.00] -; BDVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:2.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cvtdq2ps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:2.00] -; BTVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cvtdq2ps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [5:1.00] -; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sitofp <8 x i32> %a0 to <8 x float> - %2 = load <8 x i32>, <8 x i32> *%a1, align 16 - %3 = sitofp <8 x i32> %2 to <8 x float> - %4 = fadd <8 x float> %1, %3 - ret <8 x float> %4 -} - -define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { -; GENERIC-LABEL: test_cvtpd2dq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00] -; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtpd2dq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00] -; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cvtpd2dq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00] -; HASWELL-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00] -; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtpd2dq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtpd2dq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtpd2dq: -; SKX: # %bb.0: -; SKX-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:0.50] -; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cvtpd2dq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [13:2.00] -; BDVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [8:2.00] -; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cvtpd2dq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:2.00] -; BTVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:2.00] -; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cvtpd2dq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) - %2 = load <4 x double>, <4 x double> *%a1, align 32 - %3 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %2) - %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32> - ret <8 x i32> %4 -} -declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone - -define <8 x i32> @test_cvttpd2dq(<4 x double> %a0, <4 x double> *%a1) { -; GENERIC-LABEL: test_cvttpd2dq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00] -; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvttpd2dq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00] -; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cvttpd2dq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00] -; HASWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] -; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttpd2dq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttpd2dq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttpd2dq: -; SKX: # %bb.0: -; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:0.50] -; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cvttpd2dq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [13:2.00] -; BDVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [8:2.00] -; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cvttpd2dq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:2.00] -; BTVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:2.00] -; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cvttpd2dq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptosi <4 x double> %a0 to <4 x i32> - %2 = load <4 x double>, <4 x double> *%a1, align 32 - %3 = fptosi <4 x double> %2 to <4 x i32> - %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32> - ret <8 x i32> %4 -} - -define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) { -; GENERIC-LABEL: test_cvtpd2ps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00] -; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtpd2ps: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00] -; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cvtpd2ps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00] -; HASWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] -; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtpd2ps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtpd2ps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtpd2ps: -; SKX: # %bb.0: -; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] -; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cvtpd2ps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [13:2.00] -; BDVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [8:2.00] -; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cvtpd2ps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:2.00] -; BTVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:2.00] -; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cvtpd2ps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00] -; ZNVER1-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptrunc <4 x double> %a0 to <4 x float> - %2 = load <4 x double>, <4 x double> *%a1, align 32 - %3 = fptrunc <4 x double> %2 to <4 x float> - %4 = shufflevector <4 x float> %1, <4 x float> %3, <8 x i32> - ret <8 x float> %4 -} - -define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { -; GENERIC-LABEL: test_cvtps2dq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00] -; GENERIC-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtps2dq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00] -; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cvtps2dq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00] -; HASWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtps2dq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [9:1.00] -; BROADWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtps2dq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50] -; SKYLAKE-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtps2dq: -; SKX: # %bb.0: -; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50] -; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cvtps2dq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [9:2.00] -; BDVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:2.00] -; BDVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cvtps2dq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [8:2.00] -; BTVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cvtps2dq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [5:1.00] -; ZNVER1-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) - %2 = load <8 x float>, <8 x float> *%a1, align 32 - %3 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %2) - %4 = or <8 x i32> %1, %3 - ret <8 x i32> %4 -} -declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone - -define <8 x i32> @test_cvttps2dq(<8 x float> %a0, <8 x float> *%a1) { -; GENERIC-LABEL: test_cvttps2dq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00] -; GENERIC-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvttps2dq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00] -; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cvttps2dq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00] -; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttps2dq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [9:1.00] -; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttps2dq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50] -; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttps2dq: -; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:0.50] -; SKX-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50] -; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cvttps2dq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [9:2.00] -; BDVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:2.00] -; BDVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cvttps2dq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [8:2.00] -; BTVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cvttps2dq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [5:1.00] -; ZNVER1-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptosi <8 x float> %a0 to <8 x i32> - %2 = load <8 x float>, <8 x float> *%a1, align 32 - %3 = fptosi <8 x float> %2 to <8 x i32> - %4 = or <8 x i32> %1, %3 - ret <8 x i32> %4 -} - -define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_divpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:44.00] -; GENERIC-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:44.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_divpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:44.00] -; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:44.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_divpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [35:28.00] -; HASWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [42:28.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_divpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [23:16.00] -; BROADWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [29:16.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_divpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:5.00] -; SKYLAKE-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:8.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_divpd: -; SKX: # %bb.0: -; SKX-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:5.00] -; SKX-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:8.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_divpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [9:19.00] -; BDVER2-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [14:19.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_divpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [38:38.00] -; BTVER2-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [43:38.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_divpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [15:15.00] -; ZNVER1-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [22:22.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fdiv <4 x double> %a0, %a1 - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = fdiv <4 x double> %1, %2 - ret <4 x double> %3 -} - -define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_divps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:28.00] -; GENERIC-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:28.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_divps: -; SANDY: # %bb.0: -; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:28.00] -; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:28.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_divps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [21:14.00] -; HASWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [28:14.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_divps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [17:10.00] -; BROADWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [23:10.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_divps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [11:5.00] -; SKYLAKE-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [18:5.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_divps: -; SKX: # %bb.0: -; SKX-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [11:5.00] -; SKX-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [18:5.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_divps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [9:19.00] -; BDVER2-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [14:19.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_divps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [38:38.00] -; BTVER2-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [43:38.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_divps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [12:12.00] -; ZNVER1-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [19:19.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fdiv <8 x float> %a0, %a1 - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = fdiv <8 x float> %1, %2 - ret <8 x float> %3 -} - -define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_dpps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00] -; GENERIC-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [19:2.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_dpps: -; SANDY: # %bb.0: -; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [19:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_dpps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00] -; HASWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [21:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_dpps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00] -; BROADWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_dpps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.50] -; SKYLAKE-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_dpps: -; SKX: # %bb.0: -; SKX-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.33] -; SKX-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_dpps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [27:3.00] -; BDVER2-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [32:3.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_dpps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:6.00] -; BTVER2-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [17:6.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_dpps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %1, <8 x float> %2, i8 7) - ret <8 x float> %3 -} -declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone - -define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_extractf128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_extractf128: -; SANDY: # %bb.0: -; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: vzeroupper # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_extractf128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_extractf128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_extractf128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_extractf128: -; SKX: # %bb.0: -; SKX-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] -; SKX-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_extractf128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [7:0.50] -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_extractf128: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_extractf128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.33] -; ZNVER1-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [8:0.50] -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x float> %a0, <8 x float> undef, <4 x i32> - %2 = shufflevector <8 x float> %a1, <8 x float> undef, <4 x i32> - store <4 x float> %2, <4 x float> *%a2 - ret <4 x float> %1 -} - -define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_haddpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; GENERIC-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_haddpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; SANDY-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_haddpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; HASWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_haddpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [11:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_haddpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_haddpd: -; SKX: # %bb.0: -; SKX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKX-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_haddpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [11:2.00] -; BDVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [16:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_haddpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [4:2.00] -; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_haddpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %1, <4 x double> %2) - ret <4 x double> %3 -} -declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone - -define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_haddps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; GENERIC-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_haddps: -; SANDY: # %bb.0: -; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_haddps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; HASWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_haddps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [11:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_haddps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_haddps: -; SKX: # %bb.0: -; SKX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKX-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_haddps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [11:2.00] -; BDVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [16:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_haddps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [4:2.00] -; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_haddps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %1, <8 x float> %2) - ret <8 x float> %3 -} -declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone - -define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_hsubpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; GENERIC-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_hsubpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_hsubpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; HASWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_hsubpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [11:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_hsubpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_hsubpd: -; SKX: # %bb.0: -; SKX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKX-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_hsubpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [11:2.00] -; BDVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [16:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_hsubpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [4:2.00] -; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_hsubpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %1, <4 x double> %2) - ret <4 x double> %3 -} -declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone - -define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_hsubps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; GENERIC-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_hsubps: -; SANDY: # %bb.0: -; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_hsubps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; HASWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_hsubps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [11:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_hsubps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_hsubps: -; SKX: # %bb.0: -; SKX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKX-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_hsubps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [11:2.00] -; BDVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [16:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_hsubps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [4:2.00] -; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_hsubps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %1, <8 x float> %2) - ret <8 x float> %3 -} -declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone - -define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_insertf128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_insertf128: -; SANDY: # %bb.0: -; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] -; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_insertf128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; HASWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_insertf128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; BROADWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50] -; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_insertf128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_insertf128: -; SKX: # %bb.0: -; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_insertf128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.50] -; BDVER2-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_insertf128: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] -; BTVER2-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_insertf128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.67] -; ZNVER1-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] -; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a1, <4 x float> undef, <8 x i32> - %2 = shufflevector <8 x float> %a0, <8 x float> %1, <8 x i32> - %3 = load <4 x float>, <4 x float> *%a2, align 16 - %4 = shufflevector <4 x float> %3, <4 x float> undef, <8 x i32> - %5 = shufflevector <8 x float> %a0, <8 x float> %4, <8 x i32> - %6 = fadd <8 x float> %2, %5 - ret <8 x float> %6 -} - -define <32 x i8> @test_lddqu(i8* %a0) { -; GENERIC-LABEL: test_lddqu: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_lddqu: -; SANDY: # %bb.0: -; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lddqu: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lddqu: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lddqu: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_lddqu: -; SKX: # %bb.0: -; SKX-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lddqu: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lddqu: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [5:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lddqu: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vlddqu (%rdi), %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) - ret <32 x i8> %1 -} -declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly - -define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) { -; GENERIC-LABEL: test_maskmovpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00] -; GENERIC-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maskmovpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00] -; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_maskmovpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00] -; HASWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maskmovpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:2.00] -; BROADWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; BROADWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maskmovpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] -; SKYLAKE-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maskmovpd: -; SKX: # %bb.0: -; SKX-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] -; SKX-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_maskmovpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [6:1.00] -; BDVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [6:2.00] -; BDVER2-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_maskmovpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [6:1.00] -; BTVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [6:2.00] -; BTVER2-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_maskmovpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:0.50] -; ZNVER1-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [4:0.50] -; ZNVER1-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %a1) - call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) - ret <2 x double> %1 -} -declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly -declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind - -define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) { -; GENERIC-LABEL: test_maskmovpd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00] -; GENERIC-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maskmovpd_ymm: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00] -; SANDY-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_maskmovpd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:2.00] -; HASWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maskmovpd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:2.00] -; BROADWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; BROADWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maskmovpd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] -; SKYLAKE-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maskmovpd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] -; SKX-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] -; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_maskmovpd_ymm: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [6:2.00] -; BDVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [6:2.00] -; BDVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_maskmovpd_ymm: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [6:2.00] -; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [6:2.00] -; BTVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_maskmovpd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:1.00] -; ZNVER1-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; ZNVER1-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %a1) - call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %a1, <4 x double> %a2) - ret <4 x double> %1 -} -declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly -declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind - -define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) { -; GENERIC-LABEL: test_maskmovps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00] -; GENERIC-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maskmovps: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00] -; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_maskmovps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00] -; HASWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maskmovps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:2.00] -; BROADWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; BROADWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maskmovps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:0.50] -; SKYLAKE-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maskmovps: -; SKX: # %bb.0: -; SKX-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:0.50] -; SKX-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_maskmovps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [6:1.00] -; BDVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [6:2.00] -; BDVER2-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_maskmovps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [6:1.00] -; BTVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [6:2.00] -; BTVER2-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_maskmovps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:0.50] -; ZNVER1-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [4:0.50] -; ZNVER1-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %a1) - call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) - ret <4 x float> %1 -} -declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly -declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind - -define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) { -; GENERIC-LABEL: test_maskmovps_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00] -; GENERIC-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maskmovps_ymm: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00] -; SANDY-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_maskmovps_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:2.00] -; HASWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maskmovps_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:2.00] -; BROADWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; BROADWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maskmovps_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:0.50] -; SKYLAKE-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maskmovps_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:0.50] -; SKX-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [2:1.00] -; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_maskmovps_ymm: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [6:2.00] -; BDVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [6:2.00] -; BDVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_maskmovps_ymm: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [6:2.00] -; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [6:2.00] -; BTVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_maskmovps_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:1.00] -; ZNVER1-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; ZNVER1-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %a1) - call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %a1, <8 x float> %a2) - ret <8 x float> %1 -} -declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly -declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind - -define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_maxpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maxpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_maxpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maxpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maxpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maxpd: -; SKX: # %bb.0: -; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_maxpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BDVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_maxpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_maxpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %1, <4 x double> %2) - ret <4 x double> %3 -} -declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone - -define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_maxps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maxps: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_maxps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maxps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maxps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maxps: -; SKX: # %bb.0: -; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_maxps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BDVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_maxps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_maxps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %1, <8 x float> %2) - ret <8 x float> %3 -} -declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone - -define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_minpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_minpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_minpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_minpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_minpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_minpd: -; SKX: # %bb.0: -; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_minpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BDVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_minpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_minpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %1, <4 x double> %2) - ret <4 x double> %3 -} -declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone - -define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_minps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_minps: -; SANDY: # %bb.0: -; SANDY-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_minps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_minps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_minps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_minps: -; SKX: # %bb.0: -; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_minps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BDVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_minps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_minps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %1, <8 x float> %2) - ret <8 x float> %3 -} -declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone - -define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { -; GENERIC-LABEL: test_movapd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movapd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] -; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movapd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] -; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movapd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [6:0.50] -; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movapd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movapd: -; SKX: # %bb.0: -; SKX-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movapd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movapd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [5:1.00] -; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movapd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovapd (%rdi), %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <4 x double>, <4 x double> *%a0, align 32 - %2 = fadd <4 x double> %1, %1 - store <4 x double> %2, <4 x double> *%a1, align 32 - ret <4 x double> %2 -} - -define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) { -; GENERIC-LABEL: test_movaps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movaps: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] -; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movaps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] -; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movaps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [6:0.50] -; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movaps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movaps: -; SKX: # %bb.0: -; SKX-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movaps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movaps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [5:1.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movaps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovaps (%rdi), %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <8 x float>, <8 x float> *%a0, align 32 - %2 = fadd <8 x float> %1, %1 - store <8 x float> %2, <8 x float> *%a1, align 32 - ret <8 x float> %2 -} - -define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { -; GENERIC-LABEL: test_movddup: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; GENERIC-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] -; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movddup: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] -; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movddup: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; HASWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] -; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movddup: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; BROADWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:0.50] -; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movddup: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; SKYLAKE-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] -; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movddup: -; SKX: # %bb.0: -; SKX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; SKX-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movddup: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:1.00] -; BDVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [2:1.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movddup: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:2.00] -; BTVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movddup: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [8:0.50] -; ZNVER1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:0.50] -; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> - %2 = load <4 x double>, <4 x double> *%a1, align 32 - %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> - %4 = fadd <4 x double> %1, %3 - ret <4 x double> %4 -} - -define i32 @test_movmskpd(<4 x double> %a0) { -; GENERIC-LABEL: test_movmskpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movmskpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] -; SANDY-NEXT: vzeroupper # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movmskpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movmskpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movmskpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movmskpd: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movmskpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [10:1.00] -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movmskpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movmskpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovmskpd %ymm0, %eax # sched: [1:1.00] -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) - ret i32 %1 -} -declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone - -define i32 @test_movmskps(<8 x float> %a0) { -; GENERIC-LABEL: test_movmskps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movmskps: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] -; SANDY-NEXT: vzeroupper # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movmskps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movmskps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movmskps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movmskps: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movmskps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovmskps %ymm0, %eax # sched: [10:1.00] -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movmskps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movmskps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovmskps %ymm0, %eax # sched: [1:1.00] -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) - ret i32 %1 -} -declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone - -define void @test_movntdq(<4 x i64> %a0, <4 x i64> *%a1) { -; GENERIC-LABEL: test_movntdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movntdq: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: vzeroupper # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movntdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movntdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movntdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movntdq: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movntdq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vmovntdq %ymm0, (%rdi) # sched: [2:2.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movntdq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: vmovntdq %ymm0, (%rdi) # sched: [2:2.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movntdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "vmovntdq $0, $1", "x,*m"(<4 x i64> %a0, <4 x i64> *%a1) - ret void -} - -define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) { -; GENERIC-LABEL: test_movntpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movntpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movntpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movntpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movntpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movntpd: -; SKX: # %bb.0: -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movntpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [3:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movntpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movntpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fadd <4 x double> %a0, %a0 - store <4 x double> %1, <4 x double> *%a1, align 32, !nontemporal !0 - ret <4 x double> %1 -} - -define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) { -; GENERIC-LABEL: test_movntps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movntps: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movntps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movntps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movntps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movntps: -; SKX: # %bb.0: -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movntps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [3:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movntps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movntps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fadd <8 x float> %a0, %a0 - store <8 x float> %1, <8 x float> *%a1, align 32, !nontemporal !0 - ret <8 x float> %1 -} - -define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) { -; GENERIC-LABEL: test_movshdup: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; GENERIC-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] -; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movshdup: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] -; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movshdup: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; HASWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] -; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movshdup: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:0.50] -; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movshdup: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; SKYLAKE-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] -; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movshdup: -; SKX: # %bb.0: -; SKX-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; SKX-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movshdup: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:1.00] -; BDVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [2:1.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movshdup: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:2.00] -; BTVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movshdup: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [8:0.50] -; ZNVER1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:0.50] -; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> - %2 = load <8 x float>, <8 x float> *%a1, align 32 - %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> - %4 = fadd <8 x float> %1, %3 - ret <8 x float> %4 -} - -define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { -; GENERIC-LABEL: test_movsldup: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; GENERIC-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] -; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movsldup: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] -; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movsldup: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; HASWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] -; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movsldup: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:0.50] -; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movsldup: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; SKYLAKE-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] -; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movsldup: -; SKX: # %bb.0: -; SKX-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; SKX-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movsldup: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:1.00] -; BDVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [2:1.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movsldup: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:2.00] -; BTVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movsldup: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [8:0.50] -; ZNVER1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:0.50] -; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> - %2 = load <8 x float>, <8 x float> *%a1, align 32 - %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> - %4 = fadd <8 x float> %1, %3 - ret <8 x float> %4 -} - -define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { -; GENERIC-LABEL: test_movupd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movupd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] -; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movupd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] -; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movupd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [6:0.50] -; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movupd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movupd: -; SKX: # %bb.0: -; SKX-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movupd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movupd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [5:1.00] -; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movupd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovupd (%rdi), %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <4 x double>, <4 x double> *%a0, align 1 - %2 = fadd <4 x double> %1, %1 - store <4 x double> %2, <4 x double> *%a1, align 1 - ret <4 x double> %2 -} - -define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { -; GENERIC-LABEL: test_movups: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movups: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] -; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movups: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] -; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movups: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [6:0.50] -; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movups: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movups: -; SKX: # %bb.0: -; SKX-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movups: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movups: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [5:1.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movups: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovups (%rdi), %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovups %ymm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <8 x float>, <8 x float> *%a0, align 1 - %2 = fadd <8 x float> %1, %1 - store <8 x float> %2, <8 x float> *%a1, align 1 - ret <8 x float> %2 -} - -define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_mulpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_mulpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_mulpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mulpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [3:0.50] -; BROADWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mulpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mulpd: -; SKX: # %bb.0: -; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_mulpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_mulpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:4.00] -; BTVER2-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:4.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_mulpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; ZNVER1-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fmul <4 x double> %a0, %a1 - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = fmul <4 x double> %1, %2 - ret <4 x double> %3 -} - -define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_mulps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_mulps: -; SANDY: # %bb.0: -; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_mulps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mulps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [3:0.50] -; BROADWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [9:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mulps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mulps: -; SKX: # %bb.0: -; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_mulps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_mulps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_mulps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; ZNVER1-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fmul <8 x float> %a0, %a1 - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = fmul <8 x float> %1, %2 - ret <8 x float> %3 -} - -define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: orpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: orpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: orpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: orpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: orpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: orpd: -; SKX: # %bb.0: -; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: orpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: orpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: orpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <4 x double> %a0 to <4 x i64> - %2 = bitcast <4 x double> %a1 to <4 x i64> - %3 = or <4 x i64> %1, %2 - %4 = load <4 x double>, <4 x double> *%a2, align 32 - %5 = bitcast <4 x double> %4 to <4 x i64> - %6 = or <4 x i64> %3, %5 - %7 = bitcast <4 x i64> %6 to <4 x double> - %8 = fadd <4 x double> %a1, %7 - ret <4 x double> %8 -} - -define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_orps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_orps: -; SANDY: # %bb.0: -; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_orps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_orps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_orps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_orps: -; SKX: # %bb.0: -; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_orps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_orps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_orps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <8 x float> %a0 to <4 x i64> - %2 = bitcast <8 x float> %a1 to <4 x i64> - %3 = or <4 x i64> %1, %2 - %4 = load <8 x float>, <8 x float> *%a2, align 32 - %5 = bitcast <8 x float> %4 to <4 x i64> - %6 = or <4 x i64> %3, %5 - %7 = bitcast <4 x i64> %6 to <8 x float> - %8 = fadd <8 x float> %a1, %7 - ret <8 x float> %8 -} - -define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_perm2f128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] -; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_perm2f128: -; SANDY: # %bb.0: -; SANDY-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; SANDY-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] -; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_perm2f128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; HASWELL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] -; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_perm2f128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00] -; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_perm2f128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKYLAKE-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_perm2f128: -; SKX: # %bb.0: -; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_perm2f128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [4:0.50] -; BDVER2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:0.50] -; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_perm2f128: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00] -; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_perm2f128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [100:0.25] -; ZNVER1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [100:0.25] -; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = shufflevector <4 x double> %a0, <4 x double> %2, <4 x i32> - %4 = fadd <4 x double> %1, %3 - ret <4 x double> %4 -} - -define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_permilpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; GENERIC-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] -; GENERIC-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_permilpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_permilpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; HASWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_permilpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_permilpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; SKYLAKE-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_permilpd: -; SKX: # %bb.0: -; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_permilpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:0.50] -; BDVER2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [2:0.50] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_permilpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00] -; BTVER2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:0.50] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_permilpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [8:0.50] -; ZNVER1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:0.50] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> - %2 = load <2 x double>, <2 x double> *%a1, align 16 - %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} - -define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) { -; GENERIC-LABEL: test_permilpd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] -; GENERIC-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] -; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_permilpd_ymm: -; SANDY: # %bb.0: -; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] -; SANDY-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] -; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_permilpd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] -; HASWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] -; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_permilpd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] -; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [7:1.00] -; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_permilpd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] -; SKYLAKE-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] -; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_permilpd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] -; SKX-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_permilpd_ymm: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [7:1.00] -; BDVER2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [2:1.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_permilpd_ymm: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [6:2.00] -; BTVER2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] -; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_permilpd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:0.50] -; ZNVER1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:0.50] -; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> - %2 = load <4 x double>, <4 x double> *%a1, align 32 - %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> - %4 = fadd <4 x double> %1, %3 - ret <4 x double> %4 -} - -define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_permilps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; GENERIC-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; GENERIC-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_permilps: -; SANDY: # %bb.0: -; SANDY-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_permilps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; HASWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_permilps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; BROADWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_permilps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; SKYLAKE-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_permilps: -; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; SKX-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_permilps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50] -; BDVER2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [2:0.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_permilps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] -; BTVER2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:0.50] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_permilps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50] -; ZNVER1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:0.50] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} - -define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) { -; GENERIC-LABEL: test_permilps_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; GENERIC-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] -; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_permilps_ymm: -; SANDY: # %bb.0: -; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; SANDY-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] -; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_permilps_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; HASWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] -; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_permilps_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; BROADWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [7:1.00] -; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_permilps_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; SKYLAKE-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] -; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_permilps_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; SKX-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_permilps_ymm: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [7:1.00] -; BDVER2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [2:1.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_permilps_ymm: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [6:2.00] -; BTVER2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_permilps_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:0.50] -; ZNVER1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.50] -; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> - %2 = load <8 x float>, <8 x float> *%a1, align 32 - %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> - %4 = fadd <8 x float> %1, %3 - ret <8 x float> %4 -} - -define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_permilvarpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_permilvarpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_permilvarpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_permilvarpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_permilvarpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_permilvarpd: -; SKX: # %bb.0: -; SKX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_permilvarpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BDVER2-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_permilvarpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BTVER2-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_permilvarpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %1, <2 x i64> %2) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone - -define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_permilvarpd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_permilvarpd_ymm: -; SANDY: # %bb.0: -; SANDY-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_permilvarpd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_permilvarpd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_permilvarpd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_permilvarpd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_permilvarpd_ymm: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [3:3.00] -; BDVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:3.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_permilvarpd_ymm: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [4:3.00] -; BTVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [9:3.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_permilvarpd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) - %2 = load <4 x i64>, <4 x i64> *%a2, align 32 - %3 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %1, <4 x i64> %2) - ret <4 x double> %3 -} -declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone - -define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_permilvarps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_permilvarps: -; SANDY: # %bb.0: -; SANDY-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_permilvarps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_permilvarps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_permilvarps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_permilvarps: -; SKX: # %bb.0: -; SKX-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_permilvarps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BDVER2-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_permilvarps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BTVER2-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_permilvarps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> %2) - ret <4 x float> %3 -} -declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone - -define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_permilvarps_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_permilvarps_ymm: -; SANDY: # %bb.0: -; SANDY-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_permilvarps_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_permilvarps_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_permilvarps_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_permilvarps_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_permilvarps_ymm: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [3:3.00] -; BDVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:3.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_permilvarps_ymm: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [4:3.00] -; BTVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [9:3.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_permilvarps_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> %2) - ret <8 x float> %3 -} -declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone - -define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) { -; GENERIC-LABEL: test_rcpps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vrcpps (%rdi), %ymm1 # sched: [14:2.00] -; GENERIC-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_rcpps: -; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps (%rdi), %ymm1 # sched: [14:2.00] -; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rcpps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [18:2.00] -; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rcpps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; BROADWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [17:2.00] -; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rcpps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00] -; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rcpps: -; SKX: # %bb.0: -; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_rcpps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [10:2.00] -; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_rcpps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [7:2.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_rcpps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vrcpps (%rdi), %ymm1 # sched: [12:0.50] -; ZNVER1-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:0.50] -; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) - %2 = load <8 x float>, <8 x float> *%a1, align 32 - %3 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %2) - %4 = fadd <8 x float> %1, %3 - ret <8 x float> %4 -} -declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone - -define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { -; GENERIC-LABEL: test_roundpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00] -; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_roundpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00] -; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_roundpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [6:0.50] -; HASWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [13:2.00] -; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_roundpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [12:2.00] -; BROADWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [6:0.50] -; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_roundpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:1.00] -; SKYLAKE-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:1.00] -; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_roundpd: -; SKX: # %bb.0: -; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:1.00] -; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_roundpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [9:2.00] -; BDVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [4:2.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_roundpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:2.00] -; BTVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_roundpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [11:1.00] -; ZNVER1-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [4:1.00] -; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) - %2 = load <4 x double>, <4 x double> *%a1, align 32 - %3 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %2, i32 7) - %4 = fadd <4 x double> %1, %3 - ret <4 x double> %4 -} -declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone - -define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { -; GENERIC-LABEL: test_roundps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00] -; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_roundps: -; SANDY: # %bb.0: -; SANDY-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00] -; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_roundps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [6:0.50] -; HASWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [13:2.00] -; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_roundps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [12:2.00] -; BROADWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [6:0.50] -; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_roundps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:1.00] -; SKYLAKE-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:1.00] -; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_roundps: -; SKX: # %bb.0: -; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:1.00] -; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_roundps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [9:2.00] -; BDVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [4:2.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_roundps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:2.00] -; BTVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_roundps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [11:1.00] -; ZNVER1-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [4:1.00] -; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) - %2 = load <8 x float>, <8 x float> *%a1, align 32 - %3 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %2, i32 7) - %4 = fadd <8 x float> %1, %3 - ret <8 x float> %4 -} -declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone - -define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) { -; GENERIC-LABEL: test_rsqrtps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:2.00] -; GENERIC-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00] -; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_rsqrtps: -; SANDY: # %bb.0: -; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:2.00] -; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00] -; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rsqrtps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [18:2.00] -; HASWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rsqrtps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00] -; BROADWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [17:2.00] -; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rsqrtps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00] -; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rsqrtps: -; SKX: # %bb.0: -; SKX-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_rsqrtps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [10:2.00] -; BDVER2-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_rsqrtps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [7:2.00] -; BTVER2-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_rsqrtps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [12:0.50] -; ZNVER1-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [5:1.00] -; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) - %2 = load <8 x float>, <8 x float> *%a1, align 32 - %3 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %2) - %4 = fadd <8 x float> %1, %3 - ret <8 x float> %4 -} -declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone - -define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_shufpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] -; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_shufpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] -; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_shufpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; HASWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] -; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_shufpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [7:1.00] -; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_shufpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] -; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_shufpd: -; SKX: # %bb.0: -; SKX-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; SKX-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_shufpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [2:1.00] -; BDVER2-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [7:1.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_shufpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; BTVER2-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [6:2.00] -; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_shufpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:0.50] -; ZNVER1-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> - %4 = fadd <4 x double> %1, %3 - ret <4 x double> %4 -} - -define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind { -; GENERIC-LABEL: test_shufps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; GENERIC-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00] -; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_shufps: -; SANDY: # %bb.0: -; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; SANDY-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00] -; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_shufps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; HASWELL-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00] -; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_shufps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; BROADWELL-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [7:1.00] -; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_shufps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; SKYLAKE-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00] -; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_shufps: -; SKX: # %bb.0: -; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; SKX-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_shufps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [2:1.00] -; BDVER2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [7:1.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_shufps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; BTVER2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [6:2.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_shufps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:0.50] -; ZNVER1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:0.50] -; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = shufflevector <8 x float> %a1, <8 x float> %2, <8 x i32> - %4 = fadd <8 x float> %1, %3 - ret <8 x float> %4 -} - -define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { -; GENERIC-LABEL: test_sqrtpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:44.00] -; GENERIC-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:44.00] -; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_sqrtpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:44.00] -; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:44.00] -; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sqrtpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [42:28.00] -; HASWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [35:28.00] -; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sqrtpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [29:28.00] -; BROADWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [35:28.00] -; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sqrtpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:12.00] -; SKYLAKE-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:12.00] -; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sqrtpd: -; SKX: # %bb.0: -; SKX-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:12.00] -; SKX-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:12.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sqrtpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [14:27.00] -; BDVER2-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [9:27.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sqrtpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [59:54.00] -; BTVER2-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [54:54.00] -; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sqrtpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [47:40.00] -; ZNVER1-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [40:40.00] -; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) - %2 = load <4 x double>, <4 x double> *%a1, align 32 - %3 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %2) - %4 = fadd <4 x double> %1, %3 - ret <4 x double> %4 -} -declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone - -define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { -; GENERIC-LABEL: test_sqrtps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:28.00] -; GENERIC-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:28.00] -; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_sqrtps: -; SANDY: # %bb.0: -; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:28.00] -; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:28.00] -; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sqrtps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [28:14.00] -; HASWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:14.00] -; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sqrtps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:14.00] -; BROADWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [27:14.00] -; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sqrtps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:6.00] -; SKYLAKE-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:6.00] -; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sqrtps: -; SKX: # %bb.0: -; SKX-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:6.00] -; SKX-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:6.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sqrtps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [14:21.00] -; BDVER2-NEXT: vsqrtps %ymm0, %ymm0 # sched: [9:21.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sqrtps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [47:42.00] -; BTVER2-NEXT: vsqrtps %ymm0, %ymm0 # sched: [42:42.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sqrtps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsqrtps (%rdi), %ymm1 # sched: [35:28.00] -; ZNVER1-NEXT: vsqrtps %ymm0, %ymm0 # sched: [28:28.00] -; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) - %2 = load <8 x float>, <8 x float> *%a1, align 32 - %3 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %2) - %4 = fadd <8 x float> %1, %3 - ret <8 x float> %4 -} -declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone - -define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_subpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_subpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_subpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_subpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_subpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_subpd: -; SKX: # %bb.0: -; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_subpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_subpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_subpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fsub <4 x double> %a0, %a1 - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = fsub <4 x double> %1, %2 - ret <4 x double> %3 -} - -define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_subps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_subps: -; SANDY: # %bb.0: -; SANDY-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_subps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_subps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_subps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_subps: -; SKX: # %bb.0: -; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_subps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_subps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_subps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fsub <8 x float> %a0, %a1 - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = fsub <8 x float> %1, %2 - ret <8 x float> %3 -} - -define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_testpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: xorl %eax, %eax # sched: [0:0.25] -; GENERIC-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: setb %al # sched: [1:0.50] -; GENERIC-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_testpd: -; SANDY: # %bb.0: -; SANDY-NEXT: xorl %eax, %eax # sched: [0:0.25] -; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: setb %al # sched: [1:0.50] -; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_testpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; HASWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: setb %al # sched: [1:0.50] -; HASWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_testpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; BROADWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: setb %al # sched: [1:0.50] -; BROADWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_testpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: vtestpd %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setb %al # sched: [1:0.50] -; SKYLAKE-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00] -; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_testpd: -; SKX: # %bb.0: -; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] -; SKX-NEXT: vtestpd %xmm1, %xmm0 # sched: [2:1.00] -; SKX-NEXT: setb %al # sched: [1:0.50] -; SKX-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00] -; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_testpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25] -; BDVER2-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: setb %al # sched: [1:0.50] -; BDVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-NEXT: adcl $0, %eax # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_testpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50] -; BTVER2-NEXT: vtestpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: adcl $0, %eax # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_testpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25] -; ZNVER1-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: setb %al # sched: [1:0.25] -; ZNVER1-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %2) - %4 = add i32 %1, %3 - ret i32 %4 -} -declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone - -define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_testpd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: xorl %eax, %eax # sched: [0:0.25] -; GENERIC-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: setb %al # sched: [1:0.50] -; GENERIC-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_testpd_ymm: -; SANDY: # %bb.0: -; SANDY-NEXT: xorl %eax, %eax # sched: [0:0.25] -; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: setb %al # sched: [1:0.50] -; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] -; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67] -; SANDY-NEXT: vzeroupper # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_testpd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; HASWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: setb %al # sched: [1:0.50] -; HASWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_testpd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; BROADWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: setb %al # sched: [1:0.50] -; BROADWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_testpd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: vtestpd %ymm1, %ymm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setb %al # sched: [1:0.50] -; SKYLAKE-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:1.00] -; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_testpd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] -; SKX-NEXT: vtestpd %ymm1, %ymm0 # sched: [2:1.00] -; SKX-NEXT: setb %al # sched: [1:0.50] -; SKX-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:1.00] -; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_testpd_ymm: -; BDVER2: # %bb.0: -; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25] -; BDVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: setb %al # sched: [1:0.50] -; BDVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [6:1.00] -; BDVER2-NEXT: adcl $0, %eax # sched: [1:1.00] -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_testpd_ymm: -; BTVER2: # %bb.0: -; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50] -; BTVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [4:2.00] -; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:2.00] -; BTVER2-NEXT: adcl $0, %eax # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_testpd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25] -; ZNVER1-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: setb %al # sched: [1:0.25] -; ZNVER1-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25] -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %2) - %4 = add i32 %1, %3 - ret i32 %4 -} -declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone - -define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_testps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: xorl %eax, %eax # sched: [0:0.25] -; GENERIC-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: setb %al # sched: [1:0.50] -; GENERIC-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_testps: -; SANDY: # %bb.0: -; SANDY-NEXT: xorl %eax, %eax # sched: [0:0.25] -; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: setb %al # sched: [1:0.50] -; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_testps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; HASWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: setb %al # sched: [1:0.50] -; HASWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_testps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; BROADWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: setb %al # sched: [1:0.50] -; BROADWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_testps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: vtestps %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setb %al # sched: [1:0.50] -; SKYLAKE-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00] -; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_testps: -; SKX: # %bb.0: -; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] -; SKX-NEXT: vtestps %xmm1, %xmm0 # sched: [2:1.00] -; SKX-NEXT: setb %al # sched: [1:0.50] -; SKX-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00] -; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_testps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25] -; BDVER2-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: setb %al # sched: [1:0.50] -; BDVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-NEXT: adcl $0, %eax # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_testps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50] -; BTVER2-NEXT: vtestps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: adcl $0, %eax # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_testps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25] -; ZNVER1-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: setb %al # sched: [1:0.25] -; ZNVER1-NEXT: vtestps (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %2) - %4 = add i32 %1, %3 - ret i32 %4 -} -declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone - -define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_testps_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: xorl %eax, %eax # sched: [0:0.25] -; GENERIC-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: setb %al # sched: [1:0.50] -; GENERIC-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_testps_ymm: -; SANDY: # %bb.0: -; SANDY-NEXT: xorl %eax, %eax # sched: [0:0.25] -; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: setb %al # sched: [1:0.50] -; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] -; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67] -; SANDY-NEXT: vzeroupper # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_testps_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; HASWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: setb %al # sched: [1:0.50] -; HASWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_testps_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; BROADWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: setb %al # sched: [1:0.50] -; BROADWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_testps_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: vtestps %ymm1, %ymm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setb %al # sched: [1:0.50] -; SKYLAKE-NEXT: vtestps (%rdi), %ymm0 # sched: [9:1.00] -; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_testps_ymm: -; SKX: # %bb.0: -; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] -; SKX-NEXT: vtestps %ymm1, %ymm0 # sched: [2:1.00] -; SKX-NEXT: setb %al # sched: [1:0.50] -; SKX-NEXT: vtestps (%rdi), %ymm0 # sched: [9:1.00] -; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_testps_ymm: -; BDVER2: # %bb.0: -; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25] -; BDVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: setb %al # sched: [1:0.50] -; BDVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [6:1.00] -; BDVER2-NEXT: adcl $0, %eax # sched: [1:1.00] -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_testps_ymm: -; BTVER2: # %bb.0: -; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50] -; BTVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [4:2.00] -; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [9:2.00] -; BTVER2-NEXT: adcl $0, %eax # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_testps_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25] -; ZNVER1-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: setb %al # sched: [1:0.25] -; ZNVER1-NEXT: vtestps (%rdi), %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25] -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %2) - %4 = add i32 %1, %3 - ret i32 %4 -} -declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone - -define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_unpckhpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] -; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_unpckhpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SANDY-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] -; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_unpckhpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] -; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_unpckhpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [7:1.00] -; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_unpckhpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] -; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_unpckhpd: -; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_unpckhpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [2:1.00] -; BDVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [7:1.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_unpckhpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [6:2.00] -; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_unpckhpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.50] -; ZNVER1-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> - %4 = fadd <4 x double> %1, %3 - ret <4 x double> %4 -} - -define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind { -; GENERIC-LABEL: test_unpckhps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_unpckhps: -; SANDY: # %bb.0: -; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_unpckhps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_unpckhps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_unpckhps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_unpckhps: -; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_unpckhps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [2:1.00] -; BDVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_unpckhps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_unpckhps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.50] -; ZNVER1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> - ret <8 x float> %3 -} - -define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_unpcklpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] -; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_unpcklpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] -; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_unpcklpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] -; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_unpcklpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [7:1.00] -; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_unpcklpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKYLAKE-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] -; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_unpcklpd: -; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_unpcklpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [2:1.00] -; BDVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [7:1.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_unpcklpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [6:2.00] -; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_unpcklpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.50] -; ZNVER1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> - %2 = load <4 x double>, <4 x double> *%a2, align 32 - %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> - %4 = fadd <4 x double> %1, %3 - ret <4 x double> %4 -} - -define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind { -; GENERIC-LABEL: test_unpcklps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_unpcklps: -; SANDY: # %bb.0: -; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_unpcklps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_unpcklps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_unpcklps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_unpcklps: -; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_unpcklps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [2:1.00] -; BDVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_unpcklps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_unpcklps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.50] -; ZNVER1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> - ret <8 x float> %3 -} - -define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { -; GENERIC-LABEL: test_xorpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_xorpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xorpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xorpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xorpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xorpd: -; SKX: # %bb.0: -; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xorpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xorpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xorpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <4 x double> %a0 to <4 x i64> - %2 = bitcast <4 x double> %a1 to <4 x i64> - %3 = xor <4 x i64> %1, %2 - %4 = load <4 x double>, <4 x double> *%a2, align 32 - %5 = bitcast <4 x double> %4 to <4 x i64> - %6 = xor <4 x i64> %3, %5 - %7 = bitcast <4 x i64> %6 to <4 x double> - %8 = fadd <4 x double> %a1, %7 - ret <4 x double> %8 -} - -define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_xorps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_xorps: -; SANDY: # %bb.0: -; SANDY-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xorps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xorps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xorps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xorps: -; SKX: # %bb.0: -; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xorps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xorps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BTVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xorps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <8 x float> %a0 to <4 x i64> - %2 = bitcast <8 x float> %a1 to <4 x i64> - %3 = xor <4 x i64> %1, %2 - %4 = load <8 x float>, <8 x float> *%a2, align 32 - %5 = bitcast <8 x float> %4 to <4 x i64> - %6 = xor <4 x i64> %3, %5 - %7 = bitcast <4 x i64> %6 to <8 x float> - %8 = fadd <8 x float> %a1, %7 - ret <8 x float> %8 -} - -define void @test_zeroall() { -; GENERIC-LABEL: test_zeroall: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vzeroall # sched: [9:2.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_zeroall: -; SANDY: # %bb.0: -; SANDY-NEXT: vzeroall # sched: [9:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_zeroall: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vzeroall # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_zeroall: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vzeroall # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_zeroall: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vzeroall # sched: [16:4.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_zeroall: -; SKX: # %bb.0: -; SKX-NEXT: vzeroall # sched: [12:5.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_zeroall: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vzeroall # sched: [90:8.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_zeroall: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vzeroall # sched: [90:36.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_zeroall: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vzeroall # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.avx.vzeroall() - ret void -} -declare void @llvm.x86.avx.vzeroall() nounwind - -define void @test_zeroupper() { -; GENERIC-LABEL: test_zeroupper: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_zeroupper: -; SANDY: # %bb.0: -; SANDY-NEXT: vzeroupper # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_zeroupper: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_zeroupper: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_zeroupper: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_zeroupper: -; SKX: # %bb.0: -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_zeroupper: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_zeroupper: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vzeroupper # sched: [46:18.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_zeroupper: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.avx.vzeroupper() - ret void -} - -define void @test_avx256_zero_idioms() { -; GENERIC-LABEL: test_avx256_zero_idioms: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:1.00] -; GENERIC-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_avx256_zero_idioms: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:1.00] -; SANDY-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:1.00] -; SANDY-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_avx256_zero_idioms: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:1.00] -; HASWELL-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:1.00] -; HASWELL-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_avx256_zero_idioms: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:1.00] -; BROADWELL-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:1.00] -; BROADWELL-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_avx256_zero_idioms: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:0.33] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_avx256_zero_idioms: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:0.33] -; SKX-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:0.33] -; SKX-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:0.33] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_avx256_zero_idioms: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [2:1.00] -; BDVER2-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [2:1.00] -; BDVER2-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [2:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_avx256_zero_idioms: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:0.50] -; BTVER2-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] -; BTVER2-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:0.50] -; BTVER2-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_avx256_zero_idioms: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:0.25] -; ZNVER1-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:0.25] -; ZNVER1-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "vxorps %ymm0, %ymm0, %ymm0\0Avxorpd %ymm1, %ymm1, %ymm1\0Avandnps %ymm2, %ymm2, %ymm2\0Avandnpd %ymm3, %ymm3, %ymm3", ""() - ret void -} -declare void @llvm.x86.avx.vzeroupper() nounwind - -!0 = !{i32 1} Index: test/CodeGen/X86/avx2-schedule.ll =================================================================== --- test/CodeGen/X86/avx2-schedule.ll +++ test/CodeGen/X86/avx2-schedule.ll @@ -1,7111 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) { -; GENERIC-LABEL: test_broadcasti128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:1.00] -; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_broadcasti128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50] -; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_broadcasti128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:0.50] -; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_broadcasti128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50] -; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_broadcasti128: -; SKX: # %bb.0: -; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50] -; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_broadcasti128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [8:0.50] -; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <4 x i32>, <4 x i32> *%a1, align 16 - %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> - %3 = add <8 x i32> %2, %a0 - ret <8 x i32> %3 -} - -define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) { -; GENERIC-LABEL: test_broadcastsd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_broadcastsd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_broadcastsd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_broadcastsd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_broadcastsd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_broadcastsd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer - %2 = fadd <4 x double> %1, %1 - ret <4 x double> %2 -} - -define <4 x float> @test_broadcastss(<4 x float> %a0) { -; GENERIC-LABEL: test_broadcastss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_broadcastss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_broadcastss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_broadcastss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_broadcastss: -; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_broadcastss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:0.50] -; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer - %2 = fadd <4 x float> %1, %1 - ret <4 x float> %2 -} - -define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) { -; GENERIC-LABEL: test_broadcastss_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_broadcastss_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_broadcastss_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_broadcastss_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_broadcastss_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_broadcastss_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer - %2 = fadd <8 x float> %1, %1 - ret <8 x float> %2 -} - -define <4 x i32> @test_extracti128(<8 x i16> %a0, <4 x i32> *%a1) { -; GENERIC-LABEL: test_extracti128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; GENERIC-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_extracti128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; HASWELL-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_extracti128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; BROADWELL-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_extracti128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; SKYLAKE-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_extracti128: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; SKX-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00] -; SKX-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_extracti128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] -; ZNVER1-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [2:0.25] -; ZNVER1-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %z = zext <8 x i16> %a0 to <8 x i32> - %ext = shufflevector <8 x i32> %z, <8 x i32> undef, <4 x i32> - store <4 x i32> %ext, <4 x i32> *%a1 - ret <4 x i32> %ext -} - -define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) { -; GENERIC-LABEL: test_gatherdpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_gatherdpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_gatherdpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_gatherdpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_gatherdpd: -; SKX: # %bb.0: -; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_gatherdpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3, i8 2) - ret <2 x double> %1 -} -declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly - -define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) { -; GENERIC-LABEL: test_gatherdpd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_gatherdpd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [27:4.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_gatherdpd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [26:5.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_gatherdpd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_gatherdpd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_gatherdpd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3, i8 8) - ret <4 x double> %1 -} -declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) nounwind readonly - -define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) { -; GENERIC-LABEL: test_gatherdps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_gatherdps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_gatherdps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_gatherdps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_gatherdps: -; SKX: # %bb.0: -; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_gatherdps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3, i8 2) - ret <4 x float> %1 -} -declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) nounwind readonly - -define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) { -; GENERIC-LABEL: test_gatherdps_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_gatherdps_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [27:6.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_gatherdps_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [26:4.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_gatherdps_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_gatherdps_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_gatherdps_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3, i8 4) - ret <8 x float> %1 -} -declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) nounwind readonly - -define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) { -; GENERIC-LABEL: test_gatherqpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_gatherqpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_gatherqpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:3.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_gatherqpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_gatherqpd: -; SKX: # %bb.0: -; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_gatherqpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3, i8 2) - ret <2 x double> %1 -} -declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) nounwind readonly - -define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) { -; GENERIC-LABEL: test_gatherqpd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_gatherqpd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [24:5.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_gatherqpd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [23:3.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_gatherqpd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_gatherqpd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_gatherqpd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3, i8 8) - ret <4 x double> %1 -} -declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) nounwind readonly - -define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) { -; GENERIC-LABEL: test_gatherqps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_gatherqps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_gatherqps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [27:5.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_gatherqps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_gatherqps: -; SKX: # %bb.0: -; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_gatherqps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3, i8 2) - ret <4 x float> %1 -} -declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly - -define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) { -; GENERIC-LABEL: test_gatherqps_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_gatherqps_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [28:3.67] -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_gatherqps_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [24:5.00] -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_gatherqps_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_gatherqps_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_gatherqps_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3, i8 4) - ret <4 x float> %1 -} -declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) nounwind readonly - -define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_inserti128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_inserti128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; HASWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_inserti128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; BROADWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_inserti128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_inserti128: -; SKX: # %bb.0: -; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_inserti128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.25] -; ZNVER1-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] -; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <8 x i32> - %2 = shufflevector <8 x i32> %a0, <8 x i32> %1, <8 x i32> - %3 = load <4 x i32>, <4 x i32> *%a2, align 16 - %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <8 x i32> - %5 = shufflevector <8 x i32> %a0, <8 x i32> %4, <8 x i32> - %6 = add <8 x i32> %2, %5 - ret <8 x i32> %6 -} - -define <4 x i64> @test_movntdqa(i8* %a0) { -; GENERIC-LABEL: test_movntdqa: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movntdqa: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movntdqa: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movntdqa: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movntdqa: -; SKX: # %bb.0: -; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_movntdqa: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) - ret <4 x i64> %1 -} -declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly - -define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_mpsadbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:1.00] -; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_mpsadbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00] -; HASWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mpsadbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00] -; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [13:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mpsadbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00] -; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mpsadbw: -; SKX: # %bb.0: -; SKX-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00] -; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_mpsadbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) - %2 = bitcast <16 x i16> %1 to <32 x i8> - %3 = load <32 x i8>, <32 x i8> *%a2, align 32 - %4 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %2, <32 x i8> %3, i8 7) - ret <16 x i16> %4 -} -declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone - -define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) { -; GENERIC-LABEL: test_pabsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] -; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pabsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] -; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pabsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:0.50] -; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pabsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] -; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pabsb: -; SKX: # %bb.0: -; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pabsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) - %2 = load <32 x i8>, <32 x i8> *%a1, align 32 - %3 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %2) - %4 = or <32 x i8> %1, %3 - ret <32 x i8> %4 -} -declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone - -define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) { -; GENERIC-LABEL: test_pabsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] -; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pabsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] -; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pabsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:0.50] -; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pabsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] -; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pabsd: -; SKX: # %bb.0: -; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pabsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) - %2 = load <8 x i32>, <8 x i32> *%a1, align 32 - %3 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %2) - %4 = or <8 x i32> %1, %3 - ret <8 x i32> %4 -} -declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone - -define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) { -; GENERIC-LABEL: test_pabsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] -; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pabsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] -; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pabsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:0.50] -; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pabsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] -; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pabsw: -; SKX: # %bb.0: -; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pabsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) - %2 = load <16 x i16>, <16 x i16> *%a1, align 32 - %3 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %2) - %4 = or <16 x i16> %1, %3 - ret <16 x i16> %4 -} -declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone - -define <16 x i16> @test_packssdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_packssdw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_packssdw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packssdw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packssdw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packssdw: -; SKX: # %bb.0: -; SKX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_packssdw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) - %2 = bitcast <16 x i16> %1 to <8 x i32> - %3 = load <8 x i32>, <8 x i32> *%a2, align 32 - %4 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %2, <8 x i32> %3) - ret <16 x i16> %4 -} -declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone - -define <32 x i8> @test_packsswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_packsswb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_packsswb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packsswb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packsswb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packsswb: -; SKX: # %bb.0: -; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_packsswb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) - %2 = bitcast <32 x i8> %1 to <16 x i16> - %3 = load <16 x i16>, <16 x i16> *%a2, align 32 - %4 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %2, <16 x i16> %3) - ret <32 x i8> %4 -} -declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone - -define <16 x i16> @test_packusdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_packusdw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_packusdw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packusdw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packusdw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packusdw: -; SKX: # %bb.0: -; SKX-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_packusdw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) - %2 = bitcast <16 x i16> %1 to <8 x i32> - %3 = load <8 x i32>, <8 x i32> *%a2, align 32 - %4 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %2, <8 x i32> %3) - ret <16 x i16> %4 -} -declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone - -define <32 x i8> @test_packuswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_packuswb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_packuswb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packuswb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packuswb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packuswb: -; SKX: # %bb.0: -; SKX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_packuswb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) - %2 = bitcast <32 x i8> %1 to <16 x i16> - %3 = load <16 x i16>, <16 x i16> *%a2, align 32 - %4 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %2, <16 x i16> %3) - ret <32 x i8> %4 -} -declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone - -define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_paddb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddb: -; SKX: # %bb.0: -; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_paddb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <32 x i8> %a0, %a1 - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = add <32 x i8> %1, %2 - ret <32 x i8> %3 -} - -define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_paddd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddd: -; SKX: # %bb.0: -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_paddd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <8 x i32> %a0, %a1 - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = add <8 x i32> %1, %2 - ret <8 x i32> %3 -} - -define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_paddq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddq: -; SKX: # %bb.0: -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_paddq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <4 x i64> %a0, %a1 - %2 = load <4 x i64>, <4 x i64> *%a2, align 32 - %3 = add <4 x i64> %1, %2 - ret <4 x i64> %3 -} - -define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_paddsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddsb: -; SKX: # %bb.0: -; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_paddsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1) - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %1, <32 x i8> %2) - ret <32 x i8> %3 -} -declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone - -define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_paddsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddsw: -; SKX: # %bb.0: -; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_paddsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone - -define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_paddusb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddusb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddusb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddusb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddusb: -; SKX: # %bb.0: -; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_paddusb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1) - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %1, <32 x i8> %2) - ret <32 x i8> %3 -} -declare <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone - -define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_paddusw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddusw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddusw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddusw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddusw: -; SKX: # %bb.0: -; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_paddusw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone - -define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_paddw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddw: -; SKX: # %bb.0: -; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_paddw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <16 x i16> %a0, %a1 - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = add <16 x i16> %1, %2 - ret <16 x i16> %3 -} - -define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_palignr: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] -; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] -; GENERIC-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_palignr: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] -; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] -; HASWELL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_palignr: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] -; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] -; BROADWELL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_palignr: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] -; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] -; SKYLAKE-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_palignr: -; SKX: # %bb.0: -; SKX-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] -; SKX-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] -; SKX-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_palignr: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:0.25] -; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:0.25] -; ZNVER1-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <32 x i8> %a1, <32 x i8> %a0, <32 x i32> - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = shufflevector <32 x i8> %a0, <32 x i8> %1, <32 x i32> - %4 = add <32 x i8> %1, %3 - ret <32 x i8> %4 -} - -define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_pand: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pand: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pand: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pand: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pand: -; SKX: # %bb.0: -; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pand: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = and <4 x i64> %a0, %a1 - %2 = load <4 x i64>, <4 x i64> *%a2, align 32 - %3 = and <4 x i64> %1, %2 - %4 = add <4 x i64> %3, %a1 - ret <4 x i64> %4 -} - -define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_pandn: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] -; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pandn: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] -; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pandn: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50] -; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pandn: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pandn: -; SKX: # %bb.0: -; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pandn: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = xor <4 x i64> %a0, - %2 = and <4 x i64> %a1, %1 - %3 = load <4 x i64>, <4 x i64> *%a2, align 32 - %4 = xor <4 x i64> %2, - %5 = and <4 x i64> %3, %4 - %6 = add <4 x i64> %2, %5 - ret <4 x i64> %6 -} - -define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_pavgb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pavgb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pavgb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pavgb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pavgb: -; SKX: # %bb.0: -; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pavgb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = zext <32 x i8> %a0 to <32 x i16> - %2 = zext <32 x i8> %a1 to <32 x i16> - %3 = add <32 x i16> %1, %2 - %4 = add <32 x i16> %3, - %5 = lshr <32 x i16> %4, - %6 = trunc <32 x i16> %5 to <32 x i8> - %7 = load <32 x i8>, <32 x i8> *%a2, align 32 - %8 = zext <32 x i8> %6 to <32 x i16> - %9 = zext <32 x i8> %7 to <32 x i16> - %10 = add <32 x i16> %8, %9 - %11 = add <32 x i16> %10, - %12 = lshr <32 x i16> %11, - %13 = trunc <32 x i16> %12 to <32 x i8> - ret <32 x i8> %13 -} - -define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_pavgw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pavgw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pavgw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pavgw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pavgw: -; SKX: # %bb.0: -; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pavgw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = zext <16 x i16> %a0 to <16 x i32> - %2 = zext <16 x i16> %a1 to <16 x i32> - %3 = add <16 x i32> %1, %2 - %4 = add <16 x i32> %3, - %5 = lshr <16 x i32> %4, - %6 = trunc <16 x i32> %5 to <16 x i16> - %7 = load <16 x i16>, <16 x i16> *%a2, align 32 - %8 = zext <16 x i16> %6 to <16 x i32> - %9 = zext <16 x i16> %7 to <16 x i32> - %10 = add <16 x i32> %8, %9 - %11 = add <16 x i32> %10, - %12 = lshr <16 x i32> %11, - %13 = trunc <16 x i32> %12 to <16 x i16> - ret <16 x i16> %13 -} - -define <4 x i32> @test_pblendd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pblendd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50] -; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] -; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pblendd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] -; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pblendd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] -; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [6:0.50] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pblendd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] -; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pblendd: -; SKX: # %bb.0: -; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] -; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pblendd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50] -; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [8:1.00] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} - -define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_pblendd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50] -; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] -; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pblendd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] -; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] -; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pblendd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] -; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [7:0.50] -; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pblendd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] -; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pblendd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] -; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pblendd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50] -; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [9:1.50] -; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = shufflevector <8 x i32> %a1, <8 x i32> %2, <8 x i32> - %4 = add <8 x i32> %1, %3 - ret <8 x i32> %4 -} - -define <32 x i8> @test_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2, <32 x i8> *%a3, <32 x i8> %a4) { -; GENERIC-LABEL: test_pblendvb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pblendvb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; HASWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pblendvb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BROADWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pblendvb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] -; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pblendvb: -; SKX: # %bb.0: -; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] -; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pblendvb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) - %2 = load <32 x i8>, <32 x i8> *%a3, align 32 - %3 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %1, <32 x i8> %2, <32 x i8> %a4) - ret <32 x i8> %3 -} -declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone - -define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_pblendw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50] -; GENERIC-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:0.50] -; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pblendw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] -; HASWELL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00] -; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pblendw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] -; BROADWELL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [7:1.00] -; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pblendw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] -; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00] -; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pblendw: -; SKX: # %bb.0: -; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] -; SKX-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00] -; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pblendw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [2:0.33] -; ZNVER1-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [9:0.50] -; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = shufflevector <16 x i16> %a1, <16 x i16> %2, <16 x i32> - %4 = add <16 x i16> %1, %3 - ret <16 x i16> %4 -} - -define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) { -; GENERIC-LABEL: test_pbroadcastb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:0.50] -; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pbroadcastb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pbroadcastb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00] -; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pbroadcastb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00] -; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pbroadcastb: -; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] -; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00] -; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pbroadcastb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [8:1.00] -; ZNVER1-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer - %2 = load <16 x i8>, <16 x i8> *%a1, align 16 - %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer - %4 = add <16 x i8> %1, %3 - ret <16 x i8> %4 -} - -define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) { -; GENERIC-LABEL: test_pbroadcastb_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pbroadcastb_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00] -; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pbroadcastb_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00] -; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pbroadcastb_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pbroadcastb_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00] -; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pbroadcastb_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:2.00] -; ZNVER1-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [2:0.25] -; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer - %2 = load <32 x i8>, <32 x i8> *%a1, align 32 - %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> zeroinitializer - %4 = add <32 x i8> %1, %3 - ret <32 x i8> %4 -} - -define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) { -; GENERIC-LABEL: test_pbroadcastd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [7:0.50] -; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pbroadcastd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pbroadcastd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [5:0.50] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pbroadcastd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pbroadcastd: -; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pbroadcastd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer - %2 = load <4 x i32>, <4 x i32> *%a1, align 16 - %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} - -define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) { -; GENERIC-LABEL: test_pbroadcastd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pbroadcastd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] -; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pbroadcastd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:0.50] -; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pbroadcastd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pbroadcastd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pbroadcastd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [2:0.25] -; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer - %2 = load <8 x i32>, <8 x i32> *%a1, align 32 - %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> zeroinitializer - %4 = add <8 x i32> %1, %3 - ret <8 x i32> %4 -} - -define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_pbroadcastq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [7:0.50] -; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pbroadcastq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pbroadcastq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [5:0.50] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pbroadcastq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pbroadcastq: -; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pbroadcastq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer - %2 = load <2 x i64>, <2 x i64> *%a1, align 16 - %3 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer - %4 = add <2 x i64> %1, %3 - ret <2 x i64> %4 -} - -define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) { -; GENERIC-LABEL: test_pbroadcastq_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pbroadcastq_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] -; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pbroadcastq_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:0.50] -; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pbroadcastq_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pbroadcastq_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pbroadcastq_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [2:0.25] -; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer - %2 = load <4 x i64>, <4 x i64> *%a1, align 32 - %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> zeroinitializer - %4 = add <4 x i64> %1, %3 - ret <4 x i64> %4 -} - -define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) { -; GENERIC-LABEL: test_pbroadcastw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:0.50] -; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pbroadcastw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pbroadcastw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00] -; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pbroadcastw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00] -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pbroadcastw: -; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] -; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00] -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pbroadcastw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [8:1.00] -; ZNVER1-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer - %2 = load <8 x i16>, <8 x i16> *%a1, align 16 - %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer - %4 = add <8 x i16> %1, %3 - ret <8 x i16> %4 -} - -define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) { -; GENERIC-LABEL: test_pbroadcastw_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pbroadcastw_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00] -; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pbroadcastw_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00] -; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pbroadcastw_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pbroadcastw_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00] -; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pbroadcastw_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:2.00] -; ZNVER1-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [2:0.25] -; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer - %2 = load <16 x i16>, <16 x i16> *%a1, align 32 - %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> zeroinitializer - %4 = add <16 x i16> %1, %3 - ret <16 x i16> %4 -} - -define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_pcmpeqb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpeqb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqb: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pcmpeqb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp eq <32 x i8> %a0, %a1 - %2 = sext <32 x i1> %1 to <32 x i8> - %3 = load <32 x i8>, <32 x i8> *%a2, align 32 - %4 = icmp eq <32 x i8> %2, %3 - %5 = sext <32 x i1> %4 to <32 x i8> - ret <32 x i8> %5 -} - -define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_pcmpeqd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpeqd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqd: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pcmpeqd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp eq <8 x i32> %a0, %a1 - %2 = sext <8 x i1> %1 to <8 x i32> - %3 = load <8 x i32>, <8 x i32> *%a2, align 32 - %4 = icmp eq <8 x i32> %2, %3 - %5 = sext <8 x i1> %4 to <8 x i32> - ret <8 x i32> %5 -} - -define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_pcmpeqq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpeqq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqq: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pcmpeqq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp eq <4 x i64> %a0, %a1 - %2 = sext <4 x i1> %1 to <4 x i64> - %3 = load <4 x i64>, <4 x i64> *%a2, align 32 - %4 = icmp eq <4 x i64> %2, %3 - %5 = sext <4 x i1> %4 to <4 x i64> - ret <4 x i64> %5 -} - -define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_pcmpeqw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpeqw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqw: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pcmpeqw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp eq <16 x i16> %a0, %a1 - %2 = sext <16 x i1> %1 to <16 x i16> - %3 = load <16 x i16>, <16 x i16> *%a2, align 32 - %4 = icmp eq <16 x i16> %2, %3 - %5 = sext <16 x i1> %4 to <16 x i16> - ret <16 x i16> %5 -} - -define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_pcmpgtb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpgtb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtb: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pcmpgtb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp sgt <32 x i8> %a0, %a1 - %2 = sext <32 x i1> %1 to <32 x i8> - %3 = load <32 x i8>, <32 x i8> *%a2, align 32 - %4 = icmp sgt <32 x i8> %2, %3 - %5 = sext <32 x i1> %4 to <32 x i8> - ret <32 x i8> %5 -} - -define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_pcmpgtd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpgtd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtd: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pcmpgtd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp sgt <8 x i32> %a0, %a1 - %2 = sext <8 x i1> %1 to <8 x i32> - %3 = load <8 x i32>, <8 x i32> *%a2, align 32 - %4 = icmp sgt <8 x i32> %2, %3 - %5 = sext <8 x i1> %4 to <8 x i32> - ret <8 x i32> %5 -} - -define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_pcmpgtq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpgtq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtq: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pcmpgtq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp sgt <4 x i64> %a0, %a1 - %2 = sext <4 x i1> %1 to <4 x i64> - %3 = load <4 x i64>, <4 x i64> *%a2, align 32 - %4 = icmp sgt <4 x i64> %2, %3 - %5 = sext <4 x i1> %4 to <4 x i64> - ret <4 x i64> %5 -} - -define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_pcmpgtw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpgtw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtw: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pcmpgtw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp sgt <16 x i16> %a0, %a1 - %2 = sext <16 x i1> %1 to <16 x i16> - %3 = load <16 x i16>, <16 x i16> *%a2, align 32 - %4 = icmp sgt <16 x i16> %2, %3 - %5 = sext <16 x i1> %4 to <16 x i16> - ret <16 x i16> %5 -} - -define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_perm2i128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] -; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_perm2i128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] -; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_perm2i128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00] -; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_perm2i128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_perm2i128: -; SKX: # %bb.0: -; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_perm2i128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [2:0.25] -; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:0.50] -; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> - %2 = load <4 x i64>, <4 x i64> *%a2, align 32 - %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> - %4 = add <4 x i64> %1, %3 - ret <4 x i64> %4 -} - -define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_permd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_permd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; HASWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_permd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BROADWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_permd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_permd: -; SKX: # %bb.0: -; SKX-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_permd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [2:0.25] -; ZNVER1-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:0.50] -; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0) - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %2, <8 x i32> %a0) - %4 = add <8 x i32> %1, %3 - ret <8 x i32> %4 -} -declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly - -define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) { -; GENERIC-LABEL: test_permpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00] -; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_permpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; HASWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] -; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_permpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; BROADWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00] -; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_permpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] -; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_permpd: -; SKX: # %bb.0: -; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_permpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [107:0.50] -; ZNVER1-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [100:0.25] -; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> - %2 = load <4 x double>, <4 x double> *%a1, align 32 - %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> - %4 = fadd <4 x double> %1, %3 - ret <4 x double> %4 -} - -define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2) { -; GENERIC-LABEL: test_permps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_permps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; HASWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_permps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BROADWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_permps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKYLAKE-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_permps: -; SKX: # %bb.0: -; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_permps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [100:0.25] -; ZNVER1-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [107:0.50] -; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0) - %2 = load <8 x float>, <8 x float> *%a2, align 32 - %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> %a0) - %4 = fadd <8 x float> %1, %3 - ret <8 x float> %4 -} -declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly - -define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) { -; GENERIC-LABEL: test_permq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00] -; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00] -; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_permq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; HASWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] -; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_permq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; BROADWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00] -; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_permq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; SKYLAKE-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_permq: -; SKX: # %bb.0: -; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_permq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:0.50] -; ZNVER1-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [2:0.25] -; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> - %2 = load <4 x i64>, <4 x i64> *%a1, align 32 - %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> - %4 = add <4 x i64> %1, %3 - ret <4 x i64> %4 -} - -define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) { -; GENERIC-LABEL: test_pgatherdd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pgatherdd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pgatherdd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pgatherdd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pgatherdd: -; SKX: # %bb.0: -; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pgatherdd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3, i8 2) - ret <4 x i32> %1 -} -declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) nounwind readonly - -define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) { -; GENERIC-LABEL: test_pgatherdd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pgatherdd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [27:6.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pgatherdd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pgatherdd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pgatherdd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pgatherdd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3, i8 2) - ret <8 x i32> %1 -} -declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) nounwind readonly - -define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) { -; GENERIC-LABEL: test_pgatherdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pgatherdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pgatherdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pgatherdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pgatherdq: -; SKX: # %bb.0: -; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pgatherdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3, i8 2) - ret <2 x i64> %1 -} -declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) nounwind readonly - -define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) { -; GENERIC-LABEL: test_pgatherdq_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pgatherdq_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [27:4.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pgatherdq_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pgatherdq_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pgatherdq_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pgatherdq_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3, i8 2) - ret <4 x i64> %1 -} -declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) nounwind readonly - -define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) { -; GENERIC-LABEL: test_pgatherqd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pgatherqd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:5.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pgatherqd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pgatherqd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pgatherqd: -; SKX: # %bb.0: -; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pgatherqd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3, i8 2) - ret <4 x i32> %1 -} -declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) nounwind readonly - -define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) { -; GENERIC-LABEL: test_pgatherqd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pgatherqd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [28:5.00] -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pgatherqd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pgatherqd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pgatherqd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pgatherqd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3, i8 2) - ret <4 x i32> %1 -} -declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) nounwind readonly - -define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) { -; GENERIC-LABEL: test_pgatherqq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pgatherqq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pgatherqq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pgatherqq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pgatherqq: -; SKX: # %bb.0: -; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pgatherqq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %a1, <2 x i64> %a2, <2 x i64> %a3, i8 2) - ret <2 x i64> %1 -} -declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) nounwind readonly - -define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) { -; GENERIC-LABEL: test_pgatherqq_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pgatherqq_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [24:5.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pgatherqq_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pgatherqq_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pgatherqq_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pgatherqq_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %a1, <4 x i64> %a2, <4 x i64> %a3, i8 2) - ret <4 x i64> %1 -} -declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) nounwind readonly - -define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_phaddd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.50] -; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_phaddd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; HASWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phaddd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phaddd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phaddd: -; SKX: # %bb.0: -; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_phaddd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %1, <8 x i32> %2) - ret <8 x i32> %3 -} -declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone - -define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_phaddsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50] -; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_phaddsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; HASWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phaddsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phaddsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phaddsw: -; SKX: # %bb.0: -; SKX-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_phaddsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone - -define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_phaddw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.50] -; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_phaddw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; HASWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phaddw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phaddw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phaddw: -; SKX: # %bb.0: -; SKX-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_phaddw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone - -define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_phsubd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.50] -; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_phsubd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; HASWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phsubd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phsubd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phsubd: -; SKX: # %bb.0: -; SKX-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_phsubd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %1, <8 x i32> %2) - ret <8 x i32> %3 -} -declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone - -define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_phsubsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50] -; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_phsubsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; HASWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phsubsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phsubsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phsubsw: -; SKX: # %bb.0: -; SKX-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_phsubsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone - -define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_phsubw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.50] -; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_phsubw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; HASWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phsubw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phsubw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phsubw: -; SKX: # %bb.0: -; SKX-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_phsubw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone - -define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_pmaddubsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaddubsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaddubsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaddubsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaddubsw: -; SKX: # %bb.0: -; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmaddubsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) - %2 = bitcast <16 x i16> %1 to <32 x i8> - %3 = load <32 x i8>, <32 x i8> *%a2, align 32 - %4 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %2, <32 x i8> %3) - ret <16 x i16> %4 -} -declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone - -define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_pmaddwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaddwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaddwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaddwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaddwd: -; SKX: # %bb.0: -; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmaddwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) - %2 = bitcast <8 x i32> %1 to <16 x i16> - %3 = load <16 x i16>, <16 x i16> *%a2, align 32 - %4 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %2, <16 x i16> %3) - ret <8 x i32> %4 -} -declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone - -define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) { -; GENERIC-LABEL: test_pmaskmovd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:1.00] -; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaskmovd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:2.00] -; HASWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaskmovd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:2.00] -; BROADWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaskmovd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] -; SKYLAKE-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaskmovd: -; SKX: # %bb.0: -; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] -; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmaskmovd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [100:0.25] -; ZNVER1-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) - call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) - ret <4 x i32> %1 -} -declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly -declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind - -define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) { -; GENERIC-LABEL: test_pmaskmovd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:1.00] -; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaskmovd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:2.00] -; HASWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaskmovd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:2.00] -; BROADWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaskmovd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] -; SKYLAKE-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaskmovd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] -; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] -; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmaskmovd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [100:0.25] -; ZNVER1-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) - call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) - ret <8 x i32> %1 -} -declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly -declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind - -define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) { -; GENERIC-LABEL: test_pmaskmovq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00] -; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaskmovq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:2.00] -; HASWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaskmovq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:2.00] -; BROADWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaskmovq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50] -; SKYLAKE-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaskmovq: -; SKX: # %bb.0: -; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50] -; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmaskmovq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00] -; ZNVER1-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) - call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) - ret <2 x i64> %1 -} -declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly -declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind - -define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) { -; GENERIC-LABEL: test_pmaskmovq_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.00] -; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaskmovq_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:2.00] -; HASWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaskmovq_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:2.00] -; BROADWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaskmovq_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50] -; SKYLAKE-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaskmovq_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50] -; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00] -; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmaskmovq_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.50] -; ZNVER1-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) - call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) - ret <4 x i64> %1 -} -declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly -declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind - -define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_pmaxsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaxsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxsb: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmaxsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %1, <32 x i8> %2) - ret <32 x i8> %3 -} -declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone - -define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_pmaxsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaxsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxsd: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmaxsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %1, <8 x i32> %2) - ret <8 x i32> %3 -} -declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone - -define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_pmaxsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaxsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxsw: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmaxsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone - -define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_pmaxub: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaxub: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxub: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxub: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxub: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmaxub: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %1, <32 x i8> %2) - ret <32 x i8> %3 -} -declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone - -define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_pmaxud: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaxud: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxud: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxud: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxud: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmaxud: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %1, <8 x i32> %2) - ret <8 x i32> %3 -} -declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone - -define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_pmaxuw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaxuw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxuw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxuw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxuw: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmaxuw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone - -define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_pminsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pminsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminsb: -; SKX: # %bb.0: -; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pminsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %1, <32 x i8> %2) - ret <32 x i8> %3 -} -declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone - -define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_pminsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pminsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminsd: -; SKX: # %bb.0: -; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pminsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %1, <8 x i32> %2) - ret <8 x i32> %3 -} -declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone - -define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_pminsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pminsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminsw: -; SKX: # %bb.0: -; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pminsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone - -define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_pminub: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pminub: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminub: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminub: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminub: -; SKX: # %bb.0: -; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pminub: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %1, <32 x i8> %2) - ret <32 x i8> %3 -} -declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone - -define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_pminud: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pminud: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminud: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminud: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminud: -; SKX: # %bb.0: -; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pminud: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %1, <8 x i32> %2) - ret <8 x i32> %3 -} -declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone - -define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_pminuw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pminuw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminuw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminuw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminuw: -; SKX: # %bb.0: -; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pminuw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone - -define i32 @test_pmovmskb(<32 x i8> %a0) { -; GENERIC-LABEL: test_pmovmskb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovmskb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovmskb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovmskb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovmskb: -; SKX: # %bb.0: -; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmovmskb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:2.00] -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) - ret i32 %1 -} -declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone - -define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) { -; GENERIC-LABEL: test_pmovsxbd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] -; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovsxbd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] -; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovsxbd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovsxbd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovsxbd: -; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmovsxbd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> - %2 = sext <8 x i8> %1 to <8 x i32> - %3 = load <16 x i8>, <16 x i8> *%a1, align 16 - %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> - %5 = sext <8 x i8> %4 to <8 x i32> - %6 = add <8 x i32> %2, %5 - ret <8 x i32> %6 -} - -define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) { -; GENERIC-LABEL: test_pmovsxbq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] -; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovsxbq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] -; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovsxbq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovsxbq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovsxbq: -; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmovsxbq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> - %2 = sext <4 x i8> %1 to <4 x i64> - %3 = load <16 x i8>, <16 x i8> *%a1, align 16 - %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> - %5 = sext <4 x i8> %4 to <4 x i64> - %6 = add <4 x i64> %2, %5 - ret <4 x i64> %6 -} - -define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) { -; GENERIC-LABEL: test_pmovsxbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00] -; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovsxbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00] -; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovsxbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovsxbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00] -; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovsxbw: -; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00] -; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmovsxbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sext <16 x i8> %a0 to <16 x i16> - %2 = load <16 x i8>, <16 x i8> *%a1, align 16 - %3 = sext <16 x i8> %2 to <16 x i16> - %4 = add <16 x i16> %1, %3 - ret <16 x i16> %4 -} - -define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) { -; GENERIC-LABEL: test_pmovsxdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00] -; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovsxdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00] -; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovsxdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovsxdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovsxdq: -; SKX: # %bb.0: -; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmovsxdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sext <4 x i32> %a0 to <4 x i64> - %2 = load <4 x i32>, <4 x i32> *%a1, align 16 - %3 = sext <4 x i32> %2 to <4 x i64> - %4 = add <4 x i64> %1, %3 - ret <4 x i64> %4 -} - -define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) { -; GENERIC-LABEL: test_pmovsxwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00] -; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovsxwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00] -; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovsxwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovsxwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovsxwd: -; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00] -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmovsxwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sext <8 x i16> %a0 to <8 x i32> - %2 = load <8 x i16>, <8 x i16> *%a1, align 16 - %3 = sext <8 x i16> %2 to <8 x i32> - %4 = add <8 x i32> %1, %3 - ret <8 x i32> %4 -} - -define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) { -; GENERIC-LABEL: test_pmovsxwq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] -; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovsxwq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] -; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovsxwq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovsxwq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovsxwq: -; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmovsxwq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> - %2 = sext <4 x i16> %1 to <4 x i64> - %3 = load <8 x i16>, <8 x i16> *%a1, align 16 - %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> - %5 = sext <4 x i16> %4 to <4 x i64> - %6 = add <4 x i64> %2, %5 - ret <4 x i64> %6 -} - -define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) { -; GENERIC-LABEL: test_pmovzxbd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] -; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovzxbd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] -; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] -; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovzxbd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] -; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [9:1.00] -; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovzxbd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] -; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovzxbd: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmovzxbd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.50] -; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> - %2 = zext <8 x i8> %1 to <8 x i32> - %3 = load <16 x i8>, <16 x i8> *%a1, align 16 - %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> - %5 = zext <8 x i8> %4 to <8 x i32> - %6 = add <8 x i32> %2, %5 - ret <8 x i32> %6 -} - -define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) { -; GENERIC-LABEL: test_pmovzxbq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] -; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovzxbq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] -; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] -; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovzxbq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] -; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00] -; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovzxbq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] -; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovzxbq: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] -; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmovzxbq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] -; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> - %2 = zext <4 x i8> %1 to <4 x i64> - %3 = load <16 x i8>, <16 x i8> *%a1, align 16 - %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> - %5 = zext <4 x i8> %4 to <4 x i64> - %6 = add <4 x i64> %2, %5 - ret <4 x i64> %6 -} - -define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) { -; GENERIC-LABEL: test_pmovzxbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] -; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovzxbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] -; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovzxbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [9:1.00] -; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovzxbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] -; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovzxbw: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] -; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmovzxbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:0.50] -; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = zext <16 x i8> %a0 to <16 x i16> - %2 = load <16 x i8>, <16 x i8> *%a1, align 16 - %3 = zext <16 x i8> %2 to <16 x i16> - %4 = add <16 x i16> %1, %3 - ret <16 x i16> %4 -} - -define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) { -; GENERIC-LABEL: test_pmovzxdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] -; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovzxdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] -; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] -; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovzxdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] -; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00] -; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovzxdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] -; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovzxdq: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] -; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmovzxdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] -; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = zext <4 x i32> %a0 to <4 x i64> - %2 = load <4 x i32>, <4 x i32> *%a1, align 16 - %3 = zext <4 x i32> %2 to <4 x i64> - %4 = add <4 x i64> %1, %3 - ret <4 x i64> %4 -} - -define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) { -; GENERIC-LABEL: test_pmovzxwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] -; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovzxwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] -; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovzxwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] -; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovzxwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovzxwd: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmovzxwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] -; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = zext <8 x i16> %a0 to <8 x i32> - %2 = load <8 x i16>, <8 x i16> *%a1, align 16 - %3 = zext <8 x i16> %2 to <8 x i32> - %4 = add <8 x i32> %1, %3 - ret <8 x i32> %4 -} - -define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) { -; GENERIC-LABEL: test_pmovzxwq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] -; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovzxwq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] -; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] -; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovzxwq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] -; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00] -; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovzxwq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] -; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovzxwq: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmovzxwq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] -; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> - %2 = zext <4 x i16> %1 to <4 x i64> - %3 = load <8 x i16>, <8 x i16> *%a1, align 16 - %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> - %5 = zext <4 x i16> %4 to <4 x i64> - %6 = add <4 x i64> %2, %5 - ret <4 x i64> %6 -} - -define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> *%a3) { -; GENERIC-LABEL: test_pmuldq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [12:1.00] -; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmuldq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [12:1.00] -; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmuldq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:1.00] -; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmuldq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:0.50] -; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmuldq: -; SKX: # %bb.0: -; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:0.50] -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmuldq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmuldq (%rdi), %ymm2, %ymm2 # sched: [11:1.00] -; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpor %ymm2, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) - %2 = load <8 x i32>, <8 x i32> *%a3, align 32 - %3 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a2, <8 x i32> %2) - %4 = or <4 x i64> %1, %3 - ret <4 x i64> %4 -} -declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone - -define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_pmulhrsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmulhrsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmulhrsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmulhrsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmulhrsw: -; SKX: # %bb.0: -; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmulhrsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone - -define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_pmulhuw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmulhuw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmulhuw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmulhuw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmulhuw: -; SKX: # %bb.0: -; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmulhuw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone - -define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_pmulhw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmulhw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmulhw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmulhw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmulhw: -; SKX: # %bb.0: -; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmulhw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone - -define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_pmulld: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmulld: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00] -; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmulld: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00] -; BROADWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [16:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmulld: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmulld: -; SKX: # %bb.0: -; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmulld: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = mul <8 x i32> %a0, %a1 - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = mul <8 x i32> %1, %2 - ret <8 x i32> %3 -} - -define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_pmullw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmullw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmullw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmullw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmullw: -; SKX: # %bb.0: -; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmullw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = mul <16 x i16> %a0, %a1 - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = mul <16 x i16> %1, %2 - ret <16 x i16> %3 -} - -define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_pmuludq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmuludq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmuludq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmuludq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmuludq: -; SKX: # %bb.0: -; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pmuludq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) - %2 = bitcast <4 x i64> %1 to <8 x i32> - %3 = load <8 x i32>, <8 x i32> *%a2, align 32 - %4 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %2, <8 x i32> %3) - ret <4 x i64> %4 -} -declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone - -define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_por: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_por: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_por: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_por: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_por: -; SKX: # %bb.0: -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_por: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = or <4 x i64> %a0, %a1 - %2 = load <4 x i64>, <4 x i64> *%a2, align 32 - %3 = or <4 x i64> %1, %2 - %4 = add <4 x i64> %3, %a1 - ret <4 x i64> %4 -} - -define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_psadbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psadbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psadbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psadbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psadbw: -; SKX: # %bb.0: -; SKX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psadbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) - %2 = bitcast <4 x i64> %1 to <32 x i8> - %3 = load <32 x i8>, <32 x i8> *%a2, align 32 - %4 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %2, <32 x i8> %3) - ret <4 x i64> %4 -} -declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone - -define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_pshufb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pshufb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pshufb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pshufb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pshufb: -; SKX: # %bb.0: -; SKX-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pshufb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> %2) - ret <32 x i8> %3 -} -declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone - -define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) { -; GENERIC-LABEL: test_pshufd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] -; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pshufd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; HASWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] -; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pshufd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; BROADWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [7:1.00] -; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pshufd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] -; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pshufd: -; SKX: # %bb.0: -; SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pshufd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:0.50] -; ZNVER1-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.25] -; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> - %2 = load <8 x i32>, <8 x i32> *%a1, align 32 - %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> - %4 = add <8 x i32> %1, %3 - ret <8 x i32> %4 -} - -define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) { -; GENERIC-LABEL: test_pshufhw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] -; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pshufhw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] -; HASWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] -; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pshufhw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] -; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [7:1.00] -; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pshufhw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] -; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] -; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pshufhw: -; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pshufhw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:0.50] -; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:0.25] -; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> - %2 = load <16 x i16>, <16 x i16> *%a1, align 32 - %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> - %4 = or <16 x i16> %1, %3 - ret <16 x i16> %4 -} - -define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) { -; GENERIC-LABEL: test_pshuflw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pshuflw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] -; HASWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] -; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pshuflw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] -; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [7:1.00] -; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pshuflw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] -; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] -; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pshuflw: -; SKX: # %bb.0: -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pshuflw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:0.50] -; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:0.25] -; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> - %2 = load <16 x i16>, <16 x i16> *%a1, align 32 - %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> - %4 = or <16 x i16> %1, %3 - ret <16 x i16> %4 -} - -define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_psignb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psignb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psignb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psignb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psignb: -; SKX: # %bb.0: -; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psignb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %1, <32 x i8> %2) - ret <32 x i8> %3 -} -declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone - -define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_psignd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psignd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psignd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psignd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psignd: -; SKX: # %bb.0: -; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psignd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %1, <8 x i32> %2) - ret <8 x i32> %3 -} -declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone - -define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_psignw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psignw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psignw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psignw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psignw: -; SKX: # %bb.0: -; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psignw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone - -define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pslld: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; GENERIC-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pslld: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; HASWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pslld: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pslld: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pslld: -; SKX: # %bb.0: -; SKX-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pslld: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [2:1.00] -; ZNVER1-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; ZNVER1-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %1, <4 x i32> %2) - %4 = shl <8 x i32> %3, - ret <8 x i32> %4 -} -declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone - -define <32 x i8> @test_pslldq(<32 x i8> %a0) { -; GENERIC-LABEL: test_pslldq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pslldq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pslldq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pslldq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pslldq: -; SKX: # %bb.0: -; SKX-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pslldq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a0, <32 x i32> - ret <32 x i8> %1 -} - -define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_psllq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; GENERIC-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psllq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; HASWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psllq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psllq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psllq: -; SKX: # %bb.0: -; SKX-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psllq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [2:1.00] -; ZNVER1-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; ZNVER1-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %1, <2 x i64> %2) - %4 = shl <4 x i64> %3, - ret <4 x i64> %4 -} -declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone - -define <4 x i32> @test_psllvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_psllvd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psllvd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psllvd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psllvd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psllvd: -; SKX: # %bb.0: -; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psllvd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %1, <4 x i32> %2) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone - -define <8 x i32> @test_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_psllvd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psllvd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; HASWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psllvd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psllvd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psllvd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psllvd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %1, <8 x i32> %2) - ret <8 x i32> %3 -} -declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone - -define <2 x i64> @test_psllvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_psllvq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psllvq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psllvq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psllvq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psllvq: -; SKX: # %bb.0: -; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psllvq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %1, <2 x i64> %2) - ret <2 x i64> %3 -} -declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone - -define <4 x i64> @test_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_psllvq_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psllvq_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psllvq_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psllvq_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psllvq_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psllvq_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) - %2 = load <4 x i64>, <4 x i64> *%a2, align 32 - %3 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %1, <4 x i64> %2) - ret <4 x i64> %3 -} -declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone - -define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psllw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; GENERIC-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psllw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; HASWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psllw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psllw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psllw: -; SKX: # %bb.0: -; SKX-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psllw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [2:1.00] -; ZNVER1-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; ZNVER1-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %1, <8 x i16> %2) - %4 = shl <16 x i16> %3, - ret <16 x i16> %4 -} -declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone - -define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_psrad: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; GENERIC-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psrad: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; HASWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrad: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrad: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrad: -; SKX: # %bb.0: -; SKX-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psrad: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [2:1.00] -; ZNVER1-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; ZNVER1-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> %2) - %4 = ashr <8 x i32> %3, - ret <8 x i32> %4 -} -declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone - -define <4 x i32> @test_psravd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_psravd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psravd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psravd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psravd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psravd: -; SKX: # %bb.0: -; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psravd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %1, <4 x i32> %2) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone - -define <8 x i32> @test_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_psravd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psravd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; HASWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psravd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psravd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psravd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psravd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %1, <8 x i32> %2) - ret <8 x i32> %3 -} -declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone - -define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psraw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; GENERIC-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psraw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; HASWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psraw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psraw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psraw: -; SKX: # %bb.0: -; SKX-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psraw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [2:1.00] -; ZNVER1-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; ZNVER1-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> %2) - %4 = ashr <16 x i16> %3, - ret <16 x i16> %4 -} -declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone - -define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_psrld: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; GENERIC-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psrld: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; HASWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrld: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrld: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrld: -; SKX: # %bb.0: -; SKX-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psrld: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [2:1.00] -; ZNVER1-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; ZNVER1-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %1, <4 x i32> %2) - %4 = lshr <8 x i32> %3, - ret <8 x i32> %4 -} -declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone - -define <32 x i8> @test_psrldq(<32 x i8> %a0) { -; GENERIC-LABEL: test_psrldq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psrldq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrldq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrldq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrldq: -; SKX: # %bb.0: -; SKX-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psrldq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i32> - ret <32 x i8> %1 -} - -define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_psrlq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; GENERIC-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psrlq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; HASWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrlq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrlq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrlq: -; SKX: # %bb.0: -; SKX-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psrlq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [2:1.00] -; ZNVER1-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; ZNVER1-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %1, <2 x i64> %2) - %4 = lshr <4 x i64> %3, - ret <4 x i64> %4 -} -declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone - -define <4 x i32> @test_psrlvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_psrlvd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psrlvd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrlvd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrlvd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrlvd: -; SKX: # %bb.0: -; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psrlvd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %1, <4 x i32> %2) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone - -define <8 x i32> @test_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_psrlvd_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psrlvd_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; HASWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrlvd_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; BROADWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrlvd_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrlvd_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psrlvd_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %1, <8 x i32> %2) - ret <8 x i32> %3 -} -declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone - -define <2 x i64> @test_psrlvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_psrlvq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psrlvq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrlvq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrlvq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrlvq: -; SKX: # %bb.0: -; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psrlvq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %1, <2 x i64> %2) - ret <2 x i64> %3 -} -declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone - -define <4 x i64> @test_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_psrlvq_ymm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psrlvq_ymm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrlvq_ymm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrlvq_ymm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrlvq_ymm: -; SKX: # %bb.0: -; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psrlvq_ymm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) - %2 = load <4 x i64>, <4 x i64> *%a2, align 32 - %3 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %1, <4 x i64> %2) - ret <4 x i64> %3 -} -declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone - -define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psrlw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; GENERIC-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psrlw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; HASWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrlw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; BROADWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrlw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrlw: -; SKX: # %bb.0: -; SKX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psrlw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [2:1.00] -; ZNVER1-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; ZNVER1-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %1, <8 x i16> %2) - %4 = lshr <16 x i16> %3, - ret <16 x i16> %4 -} -declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone - -define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_psubb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubb: -; SKX: # %bb.0: -; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psubb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sub <32 x i8> %a0, %a1 - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = sub <32 x i8> %1, %2 - ret <32 x i8> %3 -} - -define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_psubd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubd: -; SKX: # %bb.0: -; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psubd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sub <8 x i32> %a0, %a1 - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = sub <8 x i32> %1, %2 - ret <8 x i32> %3 -} - -define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_psubq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubq: -; SKX: # %bb.0: -; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psubq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sub <4 x i64> %a0, %a1 - %2 = load <4 x i64>, <4 x i64> *%a2, align 32 - %3 = sub <4 x i64> %1, %2 - ret <4 x i64> %3 -} - -define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_psubsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubsb: -; SKX: # %bb.0: -; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psubsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1) - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %1, <32 x i8> %2) - ret <32 x i8> %3 -} -declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone - -define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_psubsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubsw: -; SKX: # %bb.0: -; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psubsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone - -define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_psubusb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubusb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubusb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubusb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubusb: -; SKX: # %bb.0: -; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psubusb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1) - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %1, <32 x i8> %2) - ret <32 x i8> %3 -} -declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone - -define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_psubusw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubusw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubusw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubusw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubusw: -; SKX: # %bb.0: -; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psubusw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1) - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %1, <16 x i16> %2) - ret <16 x i16> %3 -} -declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone - -define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_psubw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubw: -; SKX: # %bb.0: -; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_psubw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sub <16 x i16> %a0, %a1 - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = sub <16 x i16> %1, %2 - ret <16 x i16> %3 -} - -define <32 x i8> @test_punpckhbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_punpckhbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] -; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpckhbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhbw: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] -; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_punpckhbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> - ret <32 x i8> %3 -} - -define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_punpckhdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] -; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpckhdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] -; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] -; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhdq: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] -; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_punpckhdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50] -; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> - %4 = add <8 x i32> %3, - ret <8 x i32> %4 -} - -define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_punpckhqdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpckhqdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhqdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00] -; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhqdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhqdq: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_punpckhqdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> - %2 = load <4 x i64>, <4 x i64> *%a2, align 32 - %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> - %4 = add <4 x i64> %1, %3 - ret <4 x i64> %4 -} - -define <16 x i16> @test_punpckhwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_punpckhwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] -; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpckhwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhwd: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] -; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_punpckhwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> - ret <16 x i16> %3 -} - -define <32 x i8> @test_punpcklbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { -; GENERIC-LABEL: test_punpcklbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] -; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpcklbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpcklbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpcklbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpcklbw: -; SKX: # %bb.0: -; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] -; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_punpcklbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:0.25] -; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> - %2 = load <32 x i8>, <32 x i8> *%a2, align 32 - %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> - ret <32 x i8> %3 -} - -define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { -; GENERIC-LABEL: test_punpckldq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] -; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpckldq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] -; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckldq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] -; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckldq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckldq: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] -; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_punpckldq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50] -; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> - %2 = load <8 x i32>, <8 x i32> *%a2, align 32 - %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> - %4 = add <8 x i32> %3, - ret <8 x i32> %4 -} - -define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_punpcklqdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpcklqdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpcklqdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00] -; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpcklqdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpcklqdq: -; SKX: # %bb.0: -; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_punpcklqdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.25] -; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> - %2 = load <4 x i64>, <4 x i64> *%a2, align 32 - %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> - %4 = add <4 x i64> %1, %3 - ret <4 x i64> %4 -} - -define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { -; GENERIC-LABEL: test_punpcklwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] -; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpcklwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpcklwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpcklwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpcklwd: -; SKX: # %bb.0: -; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] -; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_punpcklwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:0.25] -; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> - %2 = load <16 x i16>, <16 x i16> *%a2, align 32 - %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> - ret <16 x i16> %3 -} - -define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { -; GENERIC-LABEL: test_pxor: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pxor: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pxor: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pxor: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pxor: -; SKX: # %bb.0: -; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pxor: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = xor <4 x i64> %a0, %a1 - %2 = load <4 x i64>, <4 x i64> *%a2, align 32 - %3 = xor <4 x i64> %1, %2 - %4 = add <4 x i64> %3, %a1 - ret <4 x i64> %4 -} - -!0 = !{i32 1} Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -1,19 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX - -; This test is an assembly of avx512 instructions to check their scheduling +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=SKX define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { ; GENERIC-LABEL: addpd512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vaddpd %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: addpd512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vaddpd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq entry: %add.i = fadd <8 x double> %x, %y ret <8 x double> %add.i @@ -22,13 +20,13 @@ define <8 x double> @addpd512fold(<8 x double> %y) { ; GENERIC-LABEL: addpd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: addpd512fold: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: retq entry: %add.i = fadd <8 x double> %y, ret <8 x double> %add.i @@ -37,13 +35,13 @@ define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { ; GENERIC-LABEL: addps512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vaddps %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: addps512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vaddps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq entry: %add.i = fadd <16 x float> %x, %y ret <16 x float> %add.i @@ -52,13 +50,13 @@ define <16 x float> @addps512fold(<16 x float> %y) { ; GENERIC-LABEL: addps512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: addps512fold: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: retq entry: %add.i = fadd <16 x float> %y, ret <16 x float> %add.i @@ -67,13 +65,13 @@ define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { ; GENERIC-LABEL: subpd512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vsubpd %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: subpd512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vsubpd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq entry: %sub.i = fsub <8 x double> %x, %y ret <8 x double> %sub.i @@ -82,13 +80,13 @@ define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { ; GENERIC-LABEL: subpd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vsubpd (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: subpd512fold: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vsubpd (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq entry: %tmp2 = load <8 x double>, <8 x double>* %x, align 8 %sub.i = fsub <8 x double> %y, %tmp2 @@ -98,13 +96,13 @@ define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { ; GENERIC-LABEL: subps512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vsubps %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: subps512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vsubps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq entry: %sub.i = fsub <16 x float> %x, %y ret <16 x float> %sub.i @@ -113,13 +111,13 @@ define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { ; GENERIC-LABEL: subps512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vsubps (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: subps512fold: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vsubps (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq entry: %tmp2 = load <16 x float>, <16 x float>* %x, align 4 %sub.i = fsub <16 x float> %y, %tmp2 @@ -129,13 +127,13 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { ; GENERIC-LABEL: imulq512: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: imulq512: ; SKX: # %bb.0: -; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq %z = mul <8 x i64>%x, %y ret <8 x i64>%z } @@ -143,13 +141,13 @@ define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { ; GENERIC-LABEL: imulq256: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: imulq256: ; SKX: # %bb.0: -; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 +; SKX-NEXT: retq %z = mul <4 x i64>%x, %y ret <4 x i64>%z } @@ -157,13 +155,13 @@ define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { ; GENERIC-LABEL: imulq128: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: imulq128: ; SKX: # %bb.0: -; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 +; SKX-NEXT: retq %z = mul <2 x i64>%x, %y ret <2 x i64>%z } @@ -171,13 +169,13 @@ define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { ; GENERIC-LABEL: mulpd512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulpd %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mulpd512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulpd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq entry: %mul.i = fmul <8 x double> %x, %y ret <8 x double> %mul.i @@ -186,13 +184,13 @@ define <8 x double> @mulpd512fold(<8 x double> %y) { ; GENERIC-LABEL: mulpd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mulpd512fold: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: retq entry: %mul.i = fmul <8 x double> %y, ret <8 x double> %mul.i @@ -201,13 +199,13 @@ define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { ; GENERIC-LABEL: mulps512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mulps512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq entry: %mul.i = fmul <16 x float> %x, %y ret <16 x float> %mul.i @@ -216,13 +214,13 @@ define <16 x float> @mulps512fold(<16 x float> %y) { ; GENERIC-LABEL: mulps512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mulps512fold: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: retq entry: %mul.i = fmul <16 x float> %y, ret <16 x float> %mul.i @@ -231,13 +229,13 @@ define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { ; GENERIC-LABEL: divpd512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [45:44.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vdivpd %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: divpd512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [23:16.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vdivpd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq entry: %div.i = fdiv <8 x double> %x, %y ret <8 x double> %div.i @@ -246,13 +244,13 @@ define <8 x double> @divpd512fold(<8 x double> %y) { ; GENERIC-LABEL: divpd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [52:44.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: divpd512fold: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [30:16.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: retq entry: %div.i = fdiv <8 x double> %y, ret <8 x double> %div.i @@ -261,13 +259,13 @@ define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { ; GENERIC-LABEL: divps512: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [29:28.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vdivps %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: divps512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [18:10.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vdivps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq entry: %div.i = fdiv <16 x float> %x, %y ret <16 x float> %div.i @@ -276,13 +274,13 @@ define <16 x float> @divps512fold(<16 x float> %y) { ; GENERIC-LABEL: divps512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [36:28.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: divps512fold: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [25:10.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT: retq entry: %div.i = fdiv <16 x float> %y, ret <16 x float> %div.i @@ -291,13 +289,13 @@ define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { ; GENERIC-LABEL: vpaddq_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpaddq_test: ; SKX: # %bb.0: -; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq %x = add <8 x i64> %i, %j ret <8 x i64> %x } @@ -305,13 +303,13 @@ define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { ; GENERIC-LABEL: vpaddq_fold_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpaddq_fold_test: ; SKX: # %bb.0: -; SKX-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddq (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq %tmp = load <8 x i64>, <8 x i64>* %j, align 4 %x = add <8 x i64> %i, %tmp ret <8 x i64> %x @@ -320,13 +318,13 @@ define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { ; GENERIC-LABEL: vpaddq_broadcast_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpaddq_broadcast_test: ; SKX: # %bb.0: -; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; SKX-NEXT: retq %x = add <8 x i64> %i, ret <8 x i64> %x } @@ -334,13 +332,13 @@ define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { ; GENERIC-LABEL: vpaddq_broadcast2_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpaddq_broadcast2_test: ; SKX: # %bb.0: -; SKX-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 +; SKX-NEXT: retq %tmp = load i64, i64* %j %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 @@ -357,13 +355,13 @@ define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { ; GENERIC-LABEL: vpaddd_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpaddd_test: ; SKX: # %bb.0: -; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq %x = add <16 x i32> %i, %j ret <16 x i32> %x } @@ -371,13 +369,13 @@ define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { ; GENERIC-LABEL: vpaddd_fold_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpaddd_fold_test: ; SKX: # %bb.0: -; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq %tmp = load <16 x i32>, <16 x i32>* %j, align 4 %x = add <16 x i32> %i, %tmp ret <16 x i32> %x @@ -386,13 +384,13 @@ define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { ; GENERIC-LABEL: vpaddd_broadcast_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpaddd_broadcast_test: ; SKX: # %bb.0: -; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: retq %x = add <16 x i32> %i, ret <16 x i32> %x } @@ -400,15 +398,15 @@ define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_mask_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpaddd_mask_test: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %j %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i @@ -418,15 +416,15 @@ define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_maskz_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpaddd_maskz_test: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %j %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer @@ -436,15 +434,15 @@ define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_mask_fold_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpaddd_mask_fold_test: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %j = load <16 x i32>, <16 x i32>* %j.ptr %x = add <16 x i32> %i, %j @@ -455,15 +453,15 @@ define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_mask_broadcast_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpaddd_mask_broadcast_test: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i @@ -473,15 +471,15 @@ define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_maskz_fold_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpaddd_maskz_fold_test: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %j = load <16 x i32>, <16 x i32>* %j.ptr %x = add <16 x i32> %i, %j @@ -492,15 +490,15 @@ define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: vpaddd_maskz_broadcast_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpaddd_maskz_broadcast_test: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer @@ -510,13 +508,13 @@ define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { ; GENERIC-LABEL: vpsubq_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpsubq_test: ; SKX: # %bb.0: -; SKX-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsubq %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq %x = sub <8 x i64> %i, %j ret <8 x i64> %x } @@ -524,13 +522,13 @@ define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { ; GENERIC-LABEL: vpsubd_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpsubd_test: ; SKX: # %bb.0: -; SKX-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsubd %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq %x = sub <16 x i32> %i, %j ret <16 x i32> %x } @@ -538,13 +536,13 @@ define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { ; GENERIC-LABEL: vpmulld_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpmulld_test: ; SKX: # %bb.0: -; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq %x = mul <16 x i32> %i, %j ret <16 x i32> %x } @@ -553,13 +551,13 @@ define float @sqrtA(float %a) nounwind uwtable readnone ssp { ; GENERIC-LABEL: sqrtA: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sqrtA: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; SKX-NEXT: retq entry: %conv1 = tail call float @sqrtf(float %a) nounwind readnone ret float %conv1 @@ -569,13 +567,13 @@ define double @sqrtB(double %a) nounwind uwtable readnone ssp { ; GENERIC-LABEL: sqrtB: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sqrtB: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 +; SKX-NEXT: retq entry: %call = tail call double @sqrt(double %a) nounwind readnone ret double %call @@ -585,13 +583,13 @@ define float @sqrtC(float %a) nounwind { ; GENERIC-LABEL: sqrtC: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sqrtC: ; SKX: # %bb.0: -; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; SKX-NEXT: retq %b = call float @llvm.sqrt.f32(float %a) ret float %b } @@ -600,13 +598,13 @@ define <16 x float> @sqrtD(<16 x float> %a) nounwind { ; GENERIC-LABEL: sqrtD: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vsqrtps %zmm0, %zmm0 # sched: [29:28.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vsqrtps %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sqrtD: ; SKX: # %bb.0: -; SKX-NEXT: vsqrtps %zmm0, %zmm0 # sched: [20:12.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vsqrtps %zmm0, %zmm0 +; SKX-NEXT: retq %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) ret <16 x float> %b } @@ -615,13 +613,13 @@ define <8 x double> @sqrtE(<8 x double> %a) nounwind { ; GENERIC-LABEL: sqrtE: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [45:44.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vsqrtpd %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sqrtE: ; SKX: # %bb.0: -; SKX-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [32:24.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vsqrtpd %zmm0, %zmm0 +; SKX-NEXT: retq %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) ret <8 x double> %b } @@ -629,13 +627,13 @@ define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { ; GENERIC-LABEL: fadd_broadcast: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: fadd_broadcast: ; SKX: # %bb.0: -; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: retq %b = fadd <16 x float> %a, ret <16 x float> %b } @@ -643,13 +641,13 @@ define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { ; GENERIC-LABEL: addq_broadcast: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: addq_broadcast: ; SKX: # %bb.0: -; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; SKX-NEXT: retq %b = add <8 x i64> %a, ret <8 x i64> %b } @@ -657,13 +655,13 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { ; GENERIC-LABEL: orq_broadcast: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: orq_broadcast: ; SKX: # %bb.0: -; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; SKX-NEXT: retq %b = or <8 x i64> %a, ret <8 x i64> %b } @@ -671,13 +669,13 @@ define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { ; GENERIC-LABEL: andd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: andd512fold: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq entry: %a = load <16 x i32>, <16 x i32>* %x, align 4 %b = and <16 x i32> %y, %a @@ -687,13 +685,13 @@ define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { ; GENERIC-LABEL: andqbrst: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: andqbrst: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 +; SKX-NEXT: retq entry: %a = load i64, i64* %ap, align 8 %b = insertelement <8 x i64> undef, i64 %a, i32 0 @@ -705,15 +703,15 @@ define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, ; GENERIC-LABEL: test_mask_vaddps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mask_vaddps: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq <16 x float> %j, <16 x i32> %mask1) nounwind readnone { %mask = icmp ne <16 x i32> %mask1, zeroinitializer @@ -725,15 +723,15 @@ define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vmulps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mask_vmulps: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = fmul <16 x float> %i, %j %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst @@ -743,15 +741,15 @@ define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vminps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mask_vminps: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %cmp_res = fcmp olt <16 x float> %i, %j %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j @@ -762,15 +760,15 @@ define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vminpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mask_vminpd: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <8 x i32> %mask1, zeroinitializer %cmp_res = fcmp olt <8 x double> %i, %j %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j @@ -781,15 +779,15 @@ define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vmaxps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mask_vmaxps: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %cmp_res = fcmp ogt <16 x float> %i, %j %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j @@ -800,15 +798,15 @@ define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vmaxpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mask_vmaxpd: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <8 x i32> %mask1, zeroinitializer %cmp_res = fcmp ogt <8 x double> %i, %j %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j @@ -819,15 +817,15 @@ define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vsubps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mask_vsubps: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = fsub <16 x float> %i, %j %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst @@ -837,15 +835,15 @@ define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vdivps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [29:28.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mask_vdivps: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [18:10.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = fdiv <16 x float> %i, %j %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst @@ -855,15 +853,15 @@ define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone { ; GENERIC-LABEL: test_mask_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mask_vaddpd: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <8 x i64> %mask1, zeroinitializer %x = fadd <8 x double> %i, %j %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst @@ -873,15 +871,15 @@ define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone { ; GENERIC-LABEL: test_maskz_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_maskz_vaddpd: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = icmp ne <8 x i64> %mask1, zeroinitializer %x = fadd <8 x double> %i, %j %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer @@ -891,15 +889,15 @@ define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind { ; GENERIC-LABEL: test_mask_fold_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mask_fold_vaddpd: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <8 x i64> %mask1, zeroinitializer %tmp = load <8 x double>, <8 x double>* %j, align 8 %x = fadd <8 x double> %i, %tmp @@ -910,15 +908,15 @@ define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind { ; GENERIC-LABEL: test_maskz_fold_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_maskz_fold_vaddpd: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = icmp ne <8 x i64> %mask1, zeroinitializer %tmp = load <8 x double>, <8 x double>* %j, align 8 %x = fadd <8 x double> %i, %tmp @@ -929,13 +927,13 @@ define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { ; GENERIC-LABEL: test_broadcast_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_broadcast_vaddpd: ; SKX: # %bb.0: -; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 +; SKX-NEXT: retq %tmp = load double, double* %j %b = insertelement <8 x double> undef, double %tmp, i32 0 %c = shufflevector <8 x double> %b, <8 x double> undef, @@ -947,17 +945,17 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind { ; GENERIC-LABEL: test_mask_broadcast_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1} # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mask_broadcast_vaddpd: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1} # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <8 x i64> %mask1, zeroinitializer %tmp = load double, double* %j %b = insertelement <8 x double> undef, double %tmp, i32 0 @@ -971,15 +969,15 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, ; GENERIC-LABEL: test_maskz_broadcast_vaddpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_maskz_broadcast_vaddpd: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq <8 x i64> %mask1) nounwind { %mask = icmp ne <8 x i64> %mask1, zeroinitializer %tmp = load double, double* %j @@ -994,13 +992,13 @@ define <16 x float> @test_fxor(<16 x float> %a) { ; GENERIC-LABEL: test_fxor: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_fxor: ; SKX: # %bb.0: -; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: retq %res = fsub <16 x float> , %a ret <16 x float>%res @@ -1009,13 +1007,13 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) { ; GENERIC-LABEL: test_fxor_8f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_fxor_8f32: ; SKX: # %bb.0: -; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 +; SKX-NEXT: retq %res = fsub <8 x float> , %a ret <8 x float>%res } @@ -1023,13 +1021,13 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) ; GENERIC-LABEL: fabs_v8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: fabs_v8f64: ; SKX: # %bb.0: -; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; SKX-NEXT: retq { %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) ret <8 x double> %t @@ -1039,13 +1037,13 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) ; GENERIC-LABEL: fabs_v16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: fabs_v16f32: ; SKX: # %bb.0: -; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: retq { %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) ret <16 x float> %t @@ -1055,27 +1053,27 @@ define double @test1(double %a, double %b) nounwind { ; GENERIC-LABEL: test1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: jne .LBB64_1 # sched: [1:1.00] -; GENERIC-NEXT: jnp .LBB64_2 # sched: [1:1.00] +; GENERIC-NEXT: vucomisd %xmm1, %xmm0 +; GENERIC-NEXT: jne .LBB64_1 +; GENERIC-NEXT: jnp .LBB64_2 ; GENERIC-NEXT: .LBB64_1: # %l1 -; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; GENERIC-NEXT: .LBB64_2: # %l2 -; GENERIC-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test1: ; SKX: # %bb.0: -; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKX-NEXT: jne .LBB64_1 # sched: [1:0.50] -; SKX-NEXT: jnp .LBB64_2 # sched: [1:0.50] +; SKX-NEXT: vucomisd %xmm1, %xmm0 +; SKX-NEXT: jne .LBB64_1 +; SKX-NEXT: jnp .LBB64_2 ; SKX-NEXT: .LBB64_1: # %l1 -; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq ; SKX-NEXT: .LBB64_2: # %l2 -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq %tobool = fcmp une double %a, %b br i1 %tobool, label %l1, label %l2 @@ -1090,25 +1088,25 @@ define float @test2(float %a, float %b) nounwind { ; GENERIC-LABEL: test2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00] -; GENERIC-NEXT: jbe .LBB65_2 # sched: [1:1.00] +; GENERIC-NEXT: vucomiss %xmm0, %xmm1 +; GENERIC-NEXT: jbe .LBB65_2 ; GENERIC-NEXT: # %bb.1: # %l1 -; GENERIC-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; GENERIC-NEXT: .LBB65_2: # %l2 -; GENERIC-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2: ; SKX: # %bb.0: -; SKX-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00] -; SKX-NEXT: jbe .LBB65_2 # sched: [1:0.50] +; SKX-NEXT: vucomiss %xmm0, %xmm1 +; SKX-NEXT: jbe .LBB65_2 ; SKX-NEXT: # %bb.1: # %l1 -; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq ; SKX-NEXT: .LBB65_2: # %l2 -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq %tobool = fcmp olt float %a, %b br i1 %tobool, label %l1, label %l2 @@ -1123,15 +1121,15 @@ define i32 @test3(float %a, float %b) { ; GENERIC-LABEL: test3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00] -; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; GENERIC-NEXT: kmovw %k0, %eax +; GENERIC-NEXT: retq ; ; SKX-LABEL: test3: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: kmovw %k0, %eax # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %k0 +; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: retq %cmp10.i = fcmp oeq float %a, %b %conv11.i = zext i1 %cmp10.i to i32 @@ -1141,33 +1139,33 @@ define float @test5(float %p) #0 { ; GENERIC-LABEL: test5: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: jne .LBB67_1 # sched: [1:1.00] -; GENERIC-NEXT: jp .LBB67_1 # sched: [1:1.00] +; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vucomiss %xmm1, %xmm0 +; GENERIC-NEXT: jne .LBB67_1 +; GENERIC-NEXT: jp .LBB67_1 ; GENERIC-NEXT: # %bb.2: # %return -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; GENERIC-NEXT: .LBB67_1: # %if.end -; GENERIC-NEXT: vcmpltss %xmm0, %xmm1, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcmpltss %xmm0, %xmm1, %k1 +; GENERIC-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; GENERIC-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test5: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] -; SKX-NEXT: jne .LBB67_1 # sched: [1:0.50] -; SKX-NEXT: jp .LBB67_1 # sched: [1:0.50] +; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vucomiss %xmm1, %xmm0 +; SKX-NEXT: jne .LBB67_1 +; SKX-NEXT: jp .LBB67_1 ; SKX-NEXT: # %bb.2: # %return -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq ; SKX-NEXT: .LBB67_1: # %if.end -; SKX-NEXT: vcmpltss %xmm0, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcmpltss %xmm0, %xmm1, %k1 +; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SKX-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT: retq entry: %cmp = fcmp oeq float %p, 0.000000e+00 br i1 %cmp, label %return, label %if.end @@ -1185,17 +1183,17 @@ define i32 @test6(i32 %a, i32 %b) { ; GENERIC-LABEL: test6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: xorl %eax, %eax # sched: [0:0.25] -; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] -; GENERIC-NEXT: sete %al # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: xorl %eax, %eax +; GENERIC-NEXT: cmpl %esi, %edi +; GENERIC-NEXT: sete %al +; GENERIC-NEXT: retq ; ; SKX-LABEL: test6: ; SKX: # %bb.0: -; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] -; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] -; SKX-NEXT: sete %al # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: xorl %eax, %eax +; SKX-NEXT: cmpl %esi, %edi +; SKX-NEXT: sete %al +; SKX-NEXT: retq %cmp = icmp eq i32 %a, %b %res = zext i1 %cmp to i32 ret i32 %res @@ -1204,17 +1202,17 @@ define i32 @test7(double %x, double %y) #2 { ; GENERIC-LABEL: test7: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: xorl %eax, %eax # sched: [0:0.25] -; GENERIC-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: setne %al # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: xorl %eax, %eax +; GENERIC-NEXT: vucomisd %xmm1, %xmm0 +; GENERIC-NEXT: setne %al +; GENERIC-NEXT: retq ; ; SKX-LABEL: test7: ; SKX: # %bb.0: # %entry -; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] -; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKX-NEXT: setne %al # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: xorl %eax, %eax +; SKX-NEXT: vucomisd %xmm1, %xmm0 +; SKX-NEXT: setne %al +; SKX-NEXT: retq entry: %0 = fcmp one double %x, %y %or = zext i1 %0 to i32 @@ -1225,26 +1223,24 @@ ; GENERIC-LABEL: test8: ; GENERIC: # %bb.0: ; GENERIC-NEXT: xorl $-2147483648, %esi # imm = 0x80000000 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: testl %edx, %edx # sched: [1:0.33] -; GENERIC-NEXT: movl $1, %eax # sched: [1:0.33] -; GENERIC-NEXT: cmovel %eax, %edx # sched: [2:0.67] -; GENERIC-NEXT: notl %edi # sched: [1:0.33] -; GENERIC-NEXT: orl %edi, %esi # sched: [1:0.33] -; GENERIC-NEXT: cmovnel %edx, %eax # sched: [2:0.67] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: testl %edx, %edx +; GENERIC-NEXT: movl $1, %eax +; GENERIC-NEXT: cmovel %eax, %edx +; GENERIC-NEXT: notl %edi +; GENERIC-NEXT: orl %edi, %esi +; GENERIC-NEXT: cmovnel %edx, %eax +; GENERIC-NEXT: retq ; ; SKX-LABEL: test8: ; SKX: # %bb.0: -; SKX-NEXT: notl %edi # sched: [1:0.25] +; SKX-NEXT: notl %edi ; SKX-NEXT: xorl $-2147483648, %esi # imm = 0x80000000 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: testl %edx, %edx # sched: [1:0.25] -; SKX-NEXT: movl $1, %eax # sched: [1:0.25] -; SKX-NEXT: cmovel %eax, %edx # sched: [1:0.50] -; SKX-NEXT: orl %edi, %esi # sched: [1:0.25] -; SKX-NEXT: cmovnel %edx, %eax # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: testl %edx, %edx +; SKX-NEXT: movl $1, %eax +; SKX-NEXT: cmovel %eax, %edx +; SKX-NEXT: orl %edi, %esi +; SKX-NEXT: cmovnel %edx, %eax +; SKX-NEXT: retq %tmp1 = icmp eq i32 %a1, -1 %tmp2 = icmp eq i32 %a2, -2147483648 %tmp3 = and i1 %tmp1, %tmp2 @@ -1257,25 +1253,25 @@ define i32 @test9(i64 %a) { ; GENERIC-LABEL: test9: ; GENERIC: # %bb.0: -; GENERIC-NEXT: testb $1, %dil # sched: [1:0.33] -; GENERIC-NEXT: jne .LBB71_2 # sched: [1:1.00] +; GENERIC-NEXT: testb $1, %dil +; GENERIC-NEXT: jne .LBB71_2 ; GENERIC-NEXT: # %bb.1: # %A -; GENERIC-NEXT: movl $6, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: movl $6, %eax +; GENERIC-NEXT: retq ; GENERIC-NEXT: .LBB71_2: # %B -; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: movl $7, %eax +; GENERIC-NEXT: retq ; ; SKX-LABEL: test9: ; SKX: # %bb.0: -; SKX-NEXT: testb $1, %dil # sched: [1:0.25] -; SKX-NEXT: jne .LBB71_2 # sched: [1:0.50] +; SKX-NEXT: testb $1, %dil +; SKX-NEXT: jne .LBB71_2 ; SKX-NEXT: # %bb.1: # %A -; SKX-NEXT: movl $6, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: movl $6, %eax +; SKX-NEXT: retq ; SKX-NEXT: .LBB71_2: # %B -; SKX-NEXT: movl $7, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: movl $7, %eax +; SKX-NEXT: retq %b = and i64 %a, 1 %cmp10.i = icmp eq i64 %b, 0 br i1 %cmp10.i, label %A, label %B @@ -1288,37 +1284,37 @@ define i32 @test10(i64 %b, i64 %c, i1 %d) { ; GENERIC-LABEL: test10: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %edx, %eax # sched: [1:0.33] -; GENERIC-NEXT: andb $1, %al # sched: [1:0.33] -; GENERIC-NEXT: cmpq %rsi, %rdi # sched: [1:0.33] -; GENERIC-NEXT: sete %cl # sched: [1:0.50] -; GENERIC-NEXT: orb %dl, %cl # sched: [1:0.33] -; GENERIC-NEXT: andb $1, %cl # sched: [1:0.33] -; GENERIC-NEXT: cmpb %cl, %al # sched: [1:0.33] -; GENERIC-NEXT: je .LBB72_1 # sched: [1:1.00] +; GENERIC-NEXT: movl %edx, %eax +; GENERIC-NEXT: andb $1, %al +; GENERIC-NEXT: cmpq %rsi, %rdi +; GENERIC-NEXT: sete %cl +; GENERIC-NEXT: orb %dl, %cl +; GENERIC-NEXT: andb $1, %cl +; GENERIC-NEXT: cmpb %cl, %al +; GENERIC-NEXT: je .LBB72_1 ; GENERIC-NEXT: # %bb.2: # %if.end.i -; GENERIC-NEXT: movl $6, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: movl $6, %eax +; GENERIC-NEXT: retq ; GENERIC-NEXT: .LBB72_1: # %if.then.i -; GENERIC-NEXT: movl $5, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: movl $5, %eax +; GENERIC-NEXT: retq ; ; SKX-LABEL: test10: ; SKX: # %bb.0: -; SKX-NEXT: movl %edx, %eax # sched: [1:0.25] -; SKX-NEXT: andb $1, %al # sched: [1:0.25] -; SKX-NEXT: cmpq %rsi, %rdi # sched: [1:0.25] -; SKX-NEXT: sete %cl # sched: [1:0.50] -; SKX-NEXT: orb %dl, %cl # sched: [1:0.25] -; SKX-NEXT: andb $1, %cl # sched: [1:0.25] -; SKX-NEXT: cmpb %cl, %al # sched: [1:0.25] -; SKX-NEXT: je .LBB72_1 # sched: [1:0.50] +; SKX-NEXT: movl %edx, %eax +; SKX-NEXT: andb $1, %al +; SKX-NEXT: cmpq %rsi, %rdi +; SKX-NEXT: sete %cl +; SKX-NEXT: orb %dl, %cl +; SKX-NEXT: andb $1, %cl +; SKX-NEXT: cmpb %cl, %al +; SKX-NEXT: je .LBB72_1 ; SKX-NEXT: # %bb.2: # %if.end.i -; SKX-NEXT: movl $6, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: movl $6, %eax +; SKX-NEXT: retq ; SKX-NEXT: .LBB72_1: # %if.then.i -; SKX-NEXT: movl $5, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: movl $5, %eax +; SKX-NEXT: retq %cmp8.i = icmp eq i64 %b, %c %or1 = or i1 %d, %cmp8.i @@ -1335,13 +1331,13 @@ define <16 x float> @sitof32(<16 x i32> %a) nounwind { ; GENERIC-LABEL: sitof32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sitof32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 +; SKX-NEXT: retq %b = sitofp <16 x i32> %a to <16 x float> ret <16 x float> %b } @@ -1349,13 +1345,13 @@ define <8 x double> @sltof864(<8 x i64> %a) { ; GENERIC-LABEL: sltof864: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sltof864: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 +; SKX-NEXT: retq %b = sitofp <8 x i64> %a to <8 x double> ret <8 x double> %b } @@ -1363,13 +1359,13 @@ define <4 x double> @slto4f64(<4 x i64> %a) { ; GENERIC-LABEL: slto4f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtqq2pd %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: slto4f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 +; SKX-NEXT: retq %b = sitofp <4 x i64> %a to <4 x double> ret <4 x double> %b } @@ -1377,13 +1373,13 @@ define <2 x double> @slto2f64(<2 x i64> %a) { ; GENERIC-LABEL: slto2f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: slto2f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 +; SKX-NEXT: retq %b = sitofp <2 x i64> %a to <2 x double> ret <2 x double> %b } @@ -1391,13 +1387,13 @@ define <2 x float> @sltof2f32(<2 x i64> %a) { ; GENERIC-LABEL: sltof2f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtqq2ps %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sltof2f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtqq2ps %xmm0, %xmm0 +; SKX-NEXT: retq %b = sitofp <2 x i64> %a to <2 x float> ret <2 x float>%b } @@ -1405,13 +1401,13 @@ define <4 x float> @slto4f32_mem(<4 x i64>* %a) { ; GENERIC-LABEL: slto4f32_mem: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtqq2psy (%rdi), %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: slto4f32_mem: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtqq2psy (%rdi), %xmm0 +; SKX-NEXT: retq %a1 = load <4 x i64>, <4 x i64>* %a, align 8 %b = sitofp <4 x i64> %a1 to <4 x float> ret <4 x float>%b @@ -1420,13 +1416,13 @@ define <4 x i64> @f64to4sl(<4 x double> %a) { ; GENERIC-LABEL: f64to4sl: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttpd2qq %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: f64to4sl: ; SKX: # %bb.0: -; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 +; SKX-NEXT: retq %b = fptosi <4 x double> %a to <4 x i64> ret <4 x i64> %b } @@ -1434,13 +1430,13 @@ define <4 x i64> @f32to4sl(<4 x float> %a) { ; GENERIC-LABEL: f32to4sl: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttps2qq %xmm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: f32to4sl: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttps2qq %xmm0, %ymm0 +; SKX-NEXT: retq %b = fptosi <4 x float> %a to <4 x i64> ret <4 x i64> %b } @@ -1448,15 +1444,15 @@ define <4 x float> @slto4f32(<4 x i64> %a) { ; GENERIC-LABEL: slto4f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtqq2ps %ymm0, %xmm0 +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: slto4f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtqq2ps %ymm0, %xmm0 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %b = sitofp <4 x i64> %a to <4 x float> ret <4 x float> %b } @@ -1464,15 +1460,15 @@ define <4 x float> @ulto4f32(<4 x i64> %a) { ; GENERIC-LABEL: ulto4f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: ulto4f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %b = uitofp <4 x i64> %a to <4 x float> ret <4 x float> %b } @@ -1480,13 +1476,13 @@ define <8 x double> @ulto8f64(<8 x i64> %a) { ; GENERIC-LABEL: ulto8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ulto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; SKX-NEXT: retq %b = uitofp <8 x i64> %a to <8 x double> ret <8 x double> %b } @@ -1494,15 +1490,15 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; GENERIC-LABEL: ulto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; GENERIC-NEXT: vcvtuqq2pd %zmm1, %zmm1 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ulto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; SKX-NEXT: vcvtuqq2pd %zmm1, %zmm1 +; SKX-NEXT: retq %b = uitofp <16 x i64> %a to <16 x double> ret <16 x double> %b } @@ -1510,13 +1506,13 @@ define <16 x i32> @f64to16si(<16 x float> %a) nounwind { ; GENERIC-LABEL: f64to16si: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: f64to16si: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 +; SKX-NEXT: retq %b = fptosi <16 x float> %a to <16 x i32> ret <16 x i32> %b } @@ -1524,13 +1520,13 @@ define <16 x i32> @f32to16ui(<16 x float> %a) nounwind { ; GENERIC-LABEL: f32to16ui: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: f32to16ui: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 +; SKX-NEXT: retq %b = fptoui <16 x float> %a to <16 x i32> ret <16 x i32> %b } @@ -1538,17 +1534,17 @@ define <16 x i8> @f32to16uc(<16 x float> %f) { ; GENERIC-LABEL: f32to16uc: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: vpmovdb %zmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 +; GENERIC-NEXT: vpmovdb %zmm0, %xmm0 +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: f32to16uc: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 +; SKX-NEXT: vpmovdb %zmm0, %xmm0 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %res = fptoui <16 x float> %f to <16 x i8> ret <16 x i8> %res } @@ -1556,15 +1552,15 @@ define <16 x i16> @f32to16us(<16 x float> %f) { ; GENERIC-LABEL: f32to16us: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: vpmovdw %zmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 +; GENERIC-NEXT: vpmovdw %zmm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: f32to16us: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 +; SKX-NEXT: vpmovdw %zmm0, %ymm0 +; SKX-NEXT: retq %res = fptoui <16 x float> %f to <16 x i16> ret <16 x i16> %res } @@ -1572,13 +1568,13 @@ define <8 x i32> @f32to8ui(<8 x float> %a) nounwind { ; GENERIC-LABEL: f32to8ui: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttps2udq %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: f32to8ui: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 +; SKX-NEXT: retq %b = fptoui <8 x float> %a to <8 x i32> ret <8 x i32> %b } @@ -1586,13 +1582,13 @@ define <4 x i32> @f32to4ui(<4 x float> %a) nounwind { ; GENERIC-LABEL: f32to4ui: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttps2udq %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: f32to4ui: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 +; SKX-NEXT: retq %b = fptoui <4 x float> %a to <4 x i32> ret <4 x i32> %b } @@ -1600,13 +1596,13 @@ define <8 x i32> @f64to8ui(<8 x double> %a) nounwind { ; GENERIC-LABEL: f64to8ui: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttpd2udq %zmm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: f64to8ui: ; SKX: # %bb.0: -; SKX-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttpd2udq %zmm0, %ymm0 +; SKX-NEXT: retq %b = fptoui <8 x double> %a to <8 x i32> ret <8 x i32> %b } @@ -1614,17 +1610,17 @@ define <8 x i16> @f64to8us(<8 x double> %f) { ; GENERIC-LABEL: f64to8us: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 +; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: f64to8us: ; SKX: # %bb.0: -; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 +; SKX-NEXT: vpmovdw %ymm0, %xmm0 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %res = fptoui <8 x double> %f to <8 x i16> ret <8 x i16> %res } @@ -1632,17 +1628,17 @@ define <8 x i8> @f64to8uc(<8 x double> %f) { ; GENERIC-LABEL: f64to8uc: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 +; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: f64to8uc: ; SKX: # %bb.0: -; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 +; SKX-NEXT: vpmovdw %ymm0, %xmm0 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %res = fptoui <8 x double> %f to <8 x i8> ret <8 x i8> %res } @@ -1650,15 +1646,15 @@ define <4 x i32> @f64to4ui(<4 x double> %a) nounwind { ; GENERIC-LABEL: f64to4ui: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttpd2udq %ymm0, %xmm0 +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: f64to4ui: ; SKX: # %bb.0: -; SKX-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttpd2udq %ymm0, %xmm0 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %b = fptoui <4 x double> %a to <4 x i32> ret <4 x i32> %b } @@ -1666,28 +1662,28 @@ define <8 x double> @sito8f64(<8 x i32> %a) { ; GENERIC-LABEL: sito8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sito8f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 +; SKX-NEXT: retq %b = sitofp <8 x i32> %a to <8 x double> ret <8 x double> %b } define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { ; GENERIC-LABEL: i32to8f64_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: i32to8f64_mask: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} +; SKX-NEXT: retq ; VLNOBW-LABEL: i32to8f64_mask: ; VLNOBW: # %bb.0: ; VLNOBW-NEXT: kmovw %edi, %k1 @@ -1701,15 +1697,15 @@ define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { ; GENERIC-LABEL: sito8f64_maskz: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sito8f64_maskz: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq ; VLNOBW-LABEL: sito8f64_maskz: ; VLNOBW: # %bb.0: ; VLNOBW-NEXT: kmovw %edi, %k1 @@ -1724,13 +1720,13 @@ define <8 x i32> @f64to8si(<8 x double> %a) { ; GENERIC-LABEL: f64to8si: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: f64to8si: ; SKX: # %bb.0: -; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 +; SKX-NEXT: retq %b = fptosi <8 x double> %a to <8 x i32> ret <8 x i32> %b } @@ -1738,15 +1734,15 @@ define <4 x i32> @f64to4si(<4 x double> %a) { ; GENERIC-LABEL: f64to4si: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: f64to4si: ; SKX: # %bb.0: -; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %b = fptosi <4 x double> %a to <4 x i32> ret <4 x i32> %b } @@ -1754,17 +1750,17 @@ define <16 x float> @f64to16f32(<16 x double> %b) nounwind { ; GENERIC-LABEL: f64to16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [4:1.00] -; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 +; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 +; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: f64to16f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [7:1.00] -; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 +; SKX-NEXT: vcvtpd2ps %zmm1, %ymm1 +; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; SKX-NEXT: retq %a = fptrunc <16 x double> %b to <16 x float> ret <16 x float> %a } @@ -1772,15 +1768,15 @@ define <4 x float> @f64to4f32(<4 x double> %b) { ; GENERIC-LABEL: f64to4f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: f64to4f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %a = fptrunc <4 x double> %b to <4 x float> ret <4 x float> %a } @@ -1788,19 +1784,19 @@ define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) { ; GENERIC-LABEL: f64to4f32_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [4:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 +; GENERIC-NEXT: vpmovd2m %xmm1, %k1 +; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: f64to4f32_mask: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm1, %xmm1 +; SKX-NEXT: vpmovd2m %xmm1, %k1 +; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %a = fptrunc <4 x double> %b to <4 x float> %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer ret <4 x float> %c @@ -1809,13 +1805,13 @@ define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind { ; GENERIC-LABEL: f64tof32_inreg: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: f64tof32_inreg: ; SKX: # %bb.0: -; SKX-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 +; SKX-NEXT: retq %ext = extractelement <2 x double> %a0, i32 0 %cvt = fptrunc double %ext to float %res = insertelement <4 x float> %a1, float %cvt, i32 0 @@ -1825,13 +1821,13 @@ define <8 x double> @f32to8f64(<8 x float> %b) nounwind { ; GENERIC-LABEL: f32to8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: f32to8f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 +; SKX-NEXT: retq %a = fpext <8 x float> %b to <8 x double> ret <8 x double> %a } @@ -1839,15 +1835,15 @@ define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) { ; GENERIC-LABEL: f32to4f64_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1 +; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: f32to4f64_mask: ; SKX: # %bb.0: -; SKX-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcmpltpd %ymm2, %ymm1, %k1 +; SKX-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} +; SKX-NEXT: retq %a = fpext <4 x float> %b to <4 x double> %mask = fcmp ogt <4 x double> %a1, %b1 %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer @@ -1857,13 +1853,13 @@ define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind { ; GENERIC-LABEL: f32tof64_inreg: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: f32tof64_inreg: ; SKX: # %bb.0: -; SKX-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq %ext = extractelement <4 x float> %a1, i32 0 %cvt = fpext float %ext to double %res = insertelement <2 x double> %a0, double %cvt, i32 0 @@ -1873,13 +1869,13 @@ define double @sltof64_load(i64* nocapture %e) { ; GENERIC-LABEL: sltof64_load: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sltof64_load: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 +; SKX-NEXT: retq entry: %tmp1 = load i64, i64* %e, align 8 %conv = sitofp i64 %tmp1 to double @@ -1889,13 +1885,13 @@ define double @sitof64_load(i32* %e) { ; GENERIC-LABEL: sitof64_load: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sitof64_load: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 +; SKX-NEXT: retq entry: %tmp1 = load i32, i32* %e, align 4 %conv = sitofp i32 %tmp1 to double @@ -1905,13 +1901,13 @@ define float @sitof32_load(i32* %e) { ; GENERIC-LABEL: sitof32_load: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sitof32_load: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 +; SKX-NEXT: retq entry: %tmp1 = load i32, i32* %e, align 4 %conv = sitofp i32 %tmp1 to float @@ -1921,13 +1917,13 @@ define float @sltof32_load(i64* %e) { ; GENERIC-LABEL: sltof32_load: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sltof32_load: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 +; SKX-NEXT: retq entry: %tmp1 = load i64, i64* %e, align 8 %conv = sitofp i64 %tmp1 to float @@ -1937,17 +1933,17 @@ define void @f32tof64_loadstore() { ; GENERIC-LABEL: f32tof64_loadstore: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; GENERIC-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; GENERIC-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) +; GENERIC-NEXT: retq ; ; SKX-LABEL: f32tof64_loadstore: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; SKX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) +; SKX-NEXT: retq entry: %f = alloca float, align 4 %d = alloca double, align 8 @@ -1960,17 +1956,17 @@ define void @f64tof32_loadstore() nounwind uwtable { ; GENERIC-LABEL: f64tof32_loadstore: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; GENERIC-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) +; GENERIC-NEXT: retq ; ; SKX-LABEL: f64tof32_loadstore: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; SKX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) +; SKX-NEXT: retq entry: %f = alloca float, align 4 %d = alloca double, align 8 @@ -1983,13 +1979,13 @@ define double @long_to_double(i64 %x) { ; GENERIC-LABEL: long_to_double: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovq %rdi, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: long_to_double: ; SKX: # %bb.0: -; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovq %rdi, %xmm0 +; SKX-NEXT: retq %res = bitcast i64 %x to double ret double %res } @@ -1997,13 +1993,13 @@ define i64 @double_to_long(double %x) { ; GENERIC-LABEL: double_to_long: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovq %xmm0, %rax +; GENERIC-NEXT: retq ; ; SKX-LABEL: double_to_long: ; SKX: # %bb.0: -; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovq %xmm0, %rax +; SKX-NEXT: retq %res = bitcast double %x to i64 ret i64 %res } @@ -2011,13 +2007,13 @@ define float @int_to_float(i32 %x) { ; GENERIC-LABEL: int_to_float: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovd %edi, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: int_to_float: ; SKX: # %bb.0: -; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovd %edi, %xmm0 +; SKX-NEXT: retq %res = bitcast i32 %x to float ret float %res } @@ -2025,13 +2021,13 @@ define i32 @float_to_int(float %x) { ; GENERIC-LABEL: float_to_int: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovd %xmm0, %eax +; GENERIC-NEXT: retq ; ; SKX-LABEL: float_to_int: ; SKX: # %bb.0: -; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovd %xmm0, %eax +; SKX-NEXT: retq %res = bitcast float %x to i32 ret i32 %res } @@ -2039,19 +2035,19 @@ define <16 x double> @uito16f64(<16 x i32> %a) nounwind { ; GENERIC-LABEL: uito16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [4:1.00] -; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [4:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm2 +; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm1 +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: uito16f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [7:1.00] -; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [7:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtudq2pd %ymm0, %zmm2 +; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; SKX-NEXT: vcvtudq2pd %ymm0, %zmm1 +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %b = uitofp <16 x i32> %a to <16 x double> ret <16 x double> %b } @@ -2059,13 +2055,13 @@ define <8 x float> @slto8f32(<8 x i64> %a) { ; GENERIC-LABEL: slto8f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: slto8f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 +; SKX-NEXT: retq %b = sitofp <8 x i64> %a to <8 x float> ret <8 x float> %b } @@ -2073,17 +2069,17 @@ define <16 x float> @slto16f32(<16 x i64> %a) { ; GENERIC-LABEL: slto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [3:1.00] -; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 +; GENERIC-NEXT: vcvtqq2ps %zmm1, %ymm1 +; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: slto16f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [7:1.00] -; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 +; SKX-NEXT: vcvtqq2ps %zmm1, %ymm1 +; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; SKX-NEXT: retq %b = sitofp <16 x i64> %a to <16 x float> ret <16 x float> %b } @@ -2091,13 +2087,13 @@ define <8 x double> @slto8f64(<8 x i64> %a) { ; GENERIC-LABEL: slto8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: slto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 +; SKX-NEXT: retq %b = sitofp <8 x i64> %a to <8 x double> ret <8 x double> %b } @@ -2105,15 +2101,15 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; GENERIC-LABEL: slto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 +; GENERIC-NEXT: vcvtqq2pd %zmm1, %zmm1 +; GENERIC-NEXT: retq ; ; SKX-LABEL: slto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 +; SKX-NEXT: vcvtqq2pd %zmm1, %zmm1 +; SKX-NEXT: retq %b = sitofp <16 x i64> %a to <16 x double> ret <16 x double> %b } @@ -2121,13 +2117,13 @@ define <8 x float> @ulto8f32(<8 x i64> %a) { ; GENERIC-LABEL: ulto8f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ulto8f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 +; SKX-NEXT: retq %b = uitofp <8 x i64> %a to <8 x float> ret <8 x float> %b } @@ -2135,17 +2131,17 @@ define <16 x float> @ulto16f32(<16 x i64> %a) { ; GENERIC-LABEL: ulto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [3:1.00] -; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 +; GENERIC-NEXT: vcvtuqq2ps %zmm1, %ymm1 +; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ulto16f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [7:1.00] -; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 +; SKX-NEXT: vcvtuqq2ps %zmm1, %ymm1 +; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; SKX-NEXT: retq %b = uitofp <16 x i64> %a to <16 x float> ret <16 x float> %b } @@ -2153,15 +2149,15 @@ define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { ; GENERIC-LABEL: uito8f64_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: uito8f64_mask: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} +; SKX-NEXT: retq ; VLNOBW-LABEL: uito8f64_mask: ; VLNOBW: # %bb.0: ; VLNOBW-NEXT: kmovw %edi, %k1 @@ -2175,15 +2171,15 @@ define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { ; GENERIC-LABEL: uito8f64_maskz: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: uito8f64_maskz: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq %1 = bitcast i8 %b to <8 x i1> %2 = uitofp <8 x i32> %a to <8 x double> %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer @@ -2193,13 +2189,13 @@ define <4 x double> @uito4f64(<4 x i32> %a) nounwind { ; GENERIC-LABEL: uito4f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtudq2pd %xmm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: uito4f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtudq2pd %xmm0, %ymm0 +; SKX-NEXT: retq %b = uitofp <4 x i32> %a to <4 x double> ret <4 x double> %b } @@ -2207,13 +2203,13 @@ define <16 x float> @uito16f32(<16 x i32> %a) nounwind { ; GENERIC-LABEL: uito16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtudq2ps %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: uito16f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtudq2ps %zmm0, %zmm0 +; SKX-NEXT: retq %b = uitofp <16 x i32> %a to <16 x float> ret <16 x float> %b } @@ -2221,13 +2217,13 @@ define <8 x double> @uito8f64(<8 x i32> %a) { ; GENERIC-LABEL: uito8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: uito8f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 +; SKX-NEXT: retq %b = uitofp <8 x i32> %a to <8 x double> ret <8 x double> %b } @@ -2235,13 +2231,13 @@ define <8 x float> @uito8f32(<8 x i32> %a) nounwind { ; GENERIC-LABEL: uito8f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtudq2ps %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: uito8f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 +; SKX-NEXT: retq %b = uitofp <8 x i32> %a to <8 x float> ret <8 x float> %b } @@ -2249,13 +2245,13 @@ define <4 x float> @uito4f32(<4 x i32> %a) nounwind { ; GENERIC-LABEL: uito4f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtudq2ps %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: uito4f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 +; SKX-NEXT: retq %b = uitofp <4 x i32> %a to <4 x float> ret <4 x float> %b } @@ -2263,13 +2259,13 @@ define i32 @fptosi(float %a) nounwind { ; GENERIC-LABEL: fptosi: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttss2si %xmm0, %eax # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttss2si %xmm0, %eax +; GENERIC-NEXT: retq ; ; SKX-LABEL: fptosi: ; SKX: # %bb.0: -; SKX-NEXT: vcvttss2si %xmm0, %eax # sched: [6:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttss2si %xmm0, %eax +; SKX-NEXT: retq %b = fptosi float %a to i32 ret i32 %b } @@ -2277,13 +2273,13 @@ define i32 @fptoui(float %a) nounwind { ; GENERIC-LABEL: fptoui: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvttss2usi %xmm0, %eax # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvttss2usi %xmm0, %eax +; GENERIC-NEXT: retq ; ; SKX-LABEL: fptoui: ; SKX: # %bb.0: -; SKX-NEXT: vcvttss2usi %xmm0, %eax # sched: [6:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvttss2usi %xmm0, %eax +; SKX-NEXT: retq %b = fptoui float %a to i32 ret i32 %b } @@ -2291,13 +2287,13 @@ define float @uitof32(i32 %a) nounwind { ; GENERIC-LABEL: uitof32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: uitof32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 +; SKX-NEXT: retq %b = uitofp i32 %a to float ret float %b } @@ -2305,13 +2301,13 @@ define double @uitof64(i32 %a) nounwind { ; GENERIC-LABEL: uitof64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: uitof64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 +; SKX-NEXT: retq %b = uitofp i32 %a to double ret double %b } @@ -2319,17 +2315,17 @@ define <16 x float> @sbto16f32(<16 x i32> %a) { ; GENERIC-LABEL: sbto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %zmm0, %k0 +; GENERIC-NEXT: vpmovm2d %k0, %zmm0 +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sbto16f32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovd2m %zmm0, %k0 +; SKX-NEXT: vpmovm2d %k0, %zmm0 +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 +; SKX-NEXT: retq %mask = icmp slt <16 x i32> %a, zeroinitializer %1 = sitofp <16 x i1> %mask to <16 x float> ret <16 x float> %1 @@ -2338,15 +2334,15 @@ define <16 x float> @scto16f32(<16 x i8> %a) { ; GENERIC-LABEL: scto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: scto16f32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 +; SKX-NEXT: retq %1 = sitofp <16 x i8> %a to <16 x float> ret <16 x float> %1 } @@ -2354,15 +2350,15 @@ define <16 x float> @ssto16f32(<16 x i16> %a) { ; GENERIC-LABEL: ssto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ssto16f32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 +; SKX-NEXT: retq %1 = sitofp <16 x i16> %a to <16 x float> ret <16 x float> %1 } @@ -2370,15 +2366,15 @@ define <8 x double> @ssto16f64(<8 x i16> %a) { ; GENERIC-LABEL: ssto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ssto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 +; SKX-NEXT: retq %1 = sitofp <8 x i16> %a to <8 x double> ret <8 x double> %1 } @@ -2386,19 +2382,19 @@ define <8 x double> @scto8f64(<8 x i8> %a) { ; GENERIC-LABEL: scto8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; GENERIC-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; GENERIC-NEXT: vpslld $24, %ymm0, %ymm0 +; GENERIC-NEXT: vpsrad $24, %ymm0, %ymm0 +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: scto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; SKX-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SKX-NEXT: vpslld $24, %ymm0, %ymm0 +; SKX-NEXT: vpsrad $24, %ymm0, %ymm0 +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 +; SKX-NEXT: retq %1 = sitofp <8 x i8> %a to <8 x double> ret <8 x double> %1 } @@ -2406,19 +2402,19 @@ define <16 x double> @scto16f64(<16 x i8> %a) { ; GENERIC-LABEL: scto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm1 +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 +; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 +; GENERIC-NEXT: retq ; ; SKX-LABEL: scto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] -; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxbd %xmm0, %zmm1 +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 +; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 +; SKX-NEXT: retq %b = sitofp <16 x i8> %a to <16 x double> ret <16 x double> %b } @@ -2426,27 +2422,27 @@ define <16 x double> @sbto16f64(<16 x double> %a) { ; GENERIC-LABEL: sbto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [0:0.25] -; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00] -; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00] -; GENERIC-NEXT: kunpckbw %k0, %k1, %k0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovm2d %k0, %zmm1 # sched: [1:0.33] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k0 +; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k1 +; GENERIC-NEXT: kunpckbw %k0, %k1, %k0 +; GENERIC-NEXT: vpmovm2d %k0, %zmm1 +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 +; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sbto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] -; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00] -; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: kunpckbw %k0, %k1, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2d %k0, %zmm1 # sched: [1:0.25] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] -; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k0 +; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k1 +; SKX-NEXT: kunpckbw %k0, %k1, %k0 +; SKX-NEXT: vpmovm2d %k0, %zmm1 +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 +; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 +; SKX-NEXT: retq %cmpres = fcmp ogt <16 x double> %a, zeroinitializer %1 = sitofp <16 x i1> %cmpres to <16 x double> ret <16 x double> %1 @@ -2455,19 +2451,19 @@ define <8 x double> @sbto8f64(<8 x double> %a) { ; GENERIC-LABEL: sbto8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00] -; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; GENERIC-NEXT: vpmovm2d %k0, %ymm0 +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sbto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25] -; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; SKX-NEXT: vpmovm2d %k0, %ymm0 +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 +; SKX-NEXT: retq %cmpres = fcmp ogt <8 x double> %a, zeroinitializer %1 = sitofp <8 x i1> %cmpres to <8 x double> ret <8 x double> %1 @@ -2476,17 +2472,17 @@ define <8 x float> @sbto8f32(<8 x float> %a) { ; GENERIC-LABEL: sbto8f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sbto8f32: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 +; SKX-NEXT: retq %cmpres = fcmp ogt <8 x float> %a, zeroinitializer %1 = sitofp <8 x i1> %cmpres to <8 x float> ret <8 x float> %1 @@ -2495,17 +2491,17 @@ define <4 x float> @sbto4f32(<4 x float> %a) { ; GENERIC-LABEL: sbto4f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sbto4f32: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; SKX-NEXT: retq %cmpres = fcmp ogt <4 x float> %a, zeroinitializer %1 = sitofp <4 x i1> %cmpres to <4 x float> ret <4 x float> %1 @@ -2514,19 +2510,19 @@ define <4 x double> @sbto4f64(<4 x double> %a) { ; GENERIC-LABEL: sbto4f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00] -; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vcmpltpd %ymm0, %ymm1, %k0 +; GENERIC-NEXT: vpmovm2d %k0, %xmm0 +; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sbto4f64: ; SKX: # %bb.0: -; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0 +; SKX-NEXT: vpmovm2d %k0, %xmm0 +; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; SKX-NEXT: retq %cmpres = fcmp ogt <4 x double> %a, zeroinitializer %1 = sitofp <4 x i1> %cmpres to <4 x double> ret <4 x double> %1 @@ -2535,17 +2531,17 @@ define <2 x float> @sbto2f32(<2 x float> %a) { ; GENERIC-LABEL: sbto2f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sbto2f32: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; SKX-NEXT: retq %cmpres = fcmp ogt <2 x float> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x float> ret <2 x float> %1 @@ -2554,19 +2550,19 @@ define <2 x double> @sbto2f64(<2 x double> %a) { ; GENERIC-LABEL: sbto2f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sbto2f64: ; SKX: # %bb.0: -; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] -; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; SKX-NEXT: retq %cmpres = fcmp ogt <2 x double> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x double> ret <2 x double> %1 @@ -2575,15 +2571,15 @@ define <16 x float> @ucto16f32(<16 x i8> %a) { ; GENERIC-LABEL: ucto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ucto16f32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 +; SKX-NEXT: retq %b = uitofp <16 x i8> %a to <16 x float> ret <16 x float>%b } @@ -2591,17 +2587,17 @@ define <8 x double> @ucto8f64(<8 x i8> %a) { ; GENERIC-LABEL: ucto8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ucto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 +; SKX-NEXT: retq %b = uitofp <8 x i8> %a to <8 x double> ret <8 x double> %b } @@ -2609,15 +2605,15 @@ define <16 x float> @swto16f32(<16 x i16> %a) { ; GENERIC-LABEL: swto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: swto16f32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 +; SKX-NEXT: retq %b = sitofp <16 x i16> %a to <16 x float> ret <16 x float> %b } @@ -2625,15 +2621,15 @@ define <8 x double> @swto8f64(<8 x i16> %a) { ; GENERIC-LABEL: swto8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: swto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 +; SKX-NEXT: retq %b = sitofp <8 x i16> %a to <8 x double> ret <8 x double> %b } @@ -2641,19 +2637,19 @@ define <16 x double> @swto16f64(<16 x i16> %a) { ; GENERIC-LABEL: swto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm1 +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 +; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 +; GENERIC-NEXT: retq ; ; SKX-LABEL: swto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] -; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxwd %ymm0, %zmm1 +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 +; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 +; SKX-NEXT: retq %b = sitofp <16 x i16> %a to <16 x double> ret <16 x double> %b } @@ -2661,19 +2657,19 @@ define <16 x double> @ucto16f64(<16 x i8> %a) { ; GENERIC-LABEL: ucto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 +; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ucto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] -; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 +; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 +; SKX-NEXT: retq %b = uitofp <16 x i8> %a to <16 x double> ret <16 x double> %b } @@ -2681,15 +2677,15 @@ define <16 x float> @uwto16f32(<16 x i16> %a) { ; GENERIC-LABEL: uwto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: uwto16f32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 +; SKX-NEXT: retq %b = uitofp <16 x i16> %a to <16 x float> ret <16 x float> %b } @@ -2697,15 +2693,15 @@ define <8 x double> @uwto8f64(<8 x i16> %a) { ; GENERIC-LABEL: uwto8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: uwto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 +; SKX-NEXT: retq %b = uitofp <8 x i16> %a to <8 x double> ret <8 x double> %b } @@ -2713,19 +2709,19 @@ define <16 x double> @uwto16f64(<16 x i16> %a) { ; GENERIC-LABEL: uwto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 +; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 +; GENERIC-NEXT: retq ; ; SKX-LABEL: uwto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] -; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 +; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 +; SKX-NEXT: retq %b = uitofp <16 x i16> %a to <16 x double> ret <16 x double> %b } @@ -2733,13 +2729,13 @@ define <16 x float> @sito16f32(<16 x i32> %a) { ; GENERIC-LABEL: sito16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sito16f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 +; SKX-NEXT: retq %b = sitofp <16 x i32> %a to <16 x float> ret <16 x float> %b } @@ -2747,19 +2743,19 @@ define <16 x double> @sito16f64(<16 x i32> %a) { ; GENERIC-LABEL: sito16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [4:1.00] -; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [4:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm2 +; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm1 +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sito16f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [7:1.00] -; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [7:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm2 +; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm1 +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %b = sitofp <16 x i32> %a to <16 x double> ret <16 x double> %b } @@ -2767,15 +2763,15 @@ define <16 x float> @usto16f32(<16 x i16> %a) { ; GENERIC-LABEL: usto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: usto16f32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 +; SKX-NEXT: retq %b = uitofp <16 x i16> %a to <16 x float> ret <16 x float> %b } @@ -2783,19 +2779,19 @@ define <16 x float> @ubto16f32(<16 x i32> %a) { ; GENERIC-LABEL: ubto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %zmm0, %k0 +; GENERIC-NEXT: vpmovm2d %k0, %zmm0 +; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 +; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ubto16f32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovd2m %zmm0, %k0 +; SKX-NEXT: vpmovm2d %k0, %zmm0 +; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 +; SKX-NEXT: retq %mask = icmp slt <16 x i32> %a, zeroinitializer %1 = uitofp <16 x i1> %mask to <16 x float> ret <16 x float> %1 @@ -2804,23 +2800,23 @@ define <16 x double> @ubto16f64(<16 x i32> %a) { ; GENERIC-LABEL: ubto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %zmm0, %k0 +; GENERIC-NEXT: vpmovm2d %k0, %zmm0 +; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm1 +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 +; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ubto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] -; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovd2m %zmm0, %k0 +; SKX-NEXT: vpmovm2d %k0, %zmm0 +; SKX-NEXT: vpsrld $31, %zmm0, %zmm1 +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 +; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 +; SKX-NEXT: retq %mask = icmp slt <16 x i32> %a, zeroinitializer %1 = uitofp <16 x i1> %mask to <16 x double> ret <16 x double> %1 @@ -2829,17 +2825,17 @@ define <8 x float> @ubto8f32(<8 x i32> %a) { ; GENERIC-LABEL: ubto8f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 +; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ubto8f32: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 +; SKX-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 +; SKX-NEXT: retq %mask = icmp slt <8 x i32> %a, zeroinitializer %1 = uitofp <8 x i1> %mask to <8 x float> ret <8 x float> %1 @@ -2848,19 +2844,19 @@ define <8 x double> @ubto8f64(<8 x i32> %a) { ; GENERIC-LABEL: ubto8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 +; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 +; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ubto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 +; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 +; SKX-NEXT: retq %mask = icmp slt <8 x i32> %a, zeroinitializer %1 = uitofp <8 x i1> %mask to <8 x double> ret <8 x double> %1 @@ -2869,17 +2865,17 @@ define <4 x float> @ubto4f32(<4 x i32> %a) { ; GENERIC-LABEL: ubto4f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; GENERIC-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ubto4f32: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; SKX-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; SKX-NEXT: retq %mask = icmp slt <4 x i32> %a, zeroinitializer %1 = uitofp <4 x i1> %mask to <4 x float> ret <4 x float> %1 @@ -2888,19 +2884,19 @@ define <4 x double> @ubto4f64(<4 x i32> %a) { ; GENERIC-LABEL: ubto4f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ubto4f64: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 +; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; SKX-NEXT: retq %mask = icmp slt <4 x i32> %a, zeroinitializer %1 = uitofp <4 x i1> %mask to <4 x double> ret <4 x double> %1 @@ -2909,21 +2905,21 @@ define <2 x float> @ubto2f32(<2 x i32> %a) { ; GENERIC-LABEL: ubto2f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50] -; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: ubto2f32: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33] -; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SKX-NEXT: retq %mask = icmp ne <2 x i32> %a, zeroinitializer %1 = uitofp <2 x i1> %mask to <2 x float> ret <2 x float> %1 @@ -2932,23 +2928,23 @@ define <2 x double> @ubto2f64(<2 x i32> %a) { ; GENERIC-LABEL: ubto2f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50] -; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50] -; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: ubto2f64: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33] -; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] -; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; SKX-NEXT: retq %mask = icmp ne <2 x i32> %a, zeroinitializer %1 = uitofp <2 x i1> %mask to <2 x double> ret <2 x double> %1 @@ -2957,17 +2953,17 @@ define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_8x8mem_to_8x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_8x8mem_to_8x16: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k1 +; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; SKX-NEXT: retq %a = load <8 x i8>,<8 x i8> *%i,align 1 %x = zext <8 x i8> %a to <8 x i16> %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer @@ -2977,17 +2973,17 @@ define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_8x8mem_to_8x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8x8mem_to_8x16: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k1 +; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} +; SKX-NEXT: retq %a = load <8 x i8>,<8 x i8> *%i,align 1 %x = sext <8 x i8> %a to <8 x i16> %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer @@ -2998,17 +2994,17 @@ define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_16x8mem_to_16x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovb2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_16x8mem_to_16x16: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 +; SKX-NEXT: vpmovb2m %xmm0, %k1 +; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero +; SKX-NEXT: retq %a = load <16 x i8>,<16 x i8> *%i,align 1 %x = zext <16 x i8> %a to <16 x i16> %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer @@ -3018,17 +3014,17 @@ define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_16x8mem_to_16x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovb2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_16x8mem_to_16x16: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 +; SKX-NEXT: vpmovb2m %xmm0, %k1 +; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} +; SKX-NEXT: retq %a = load <16 x i8>,<16 x i8> *%i,align 1 %x = sext <16 x i8> %a to <16 x i16> %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer @@ -3038,13 +3034,13 @@ define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { ; GENERIC-LABEL: zext_16x8_to_16x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_16x8_to_16x16: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; SKX-NEXT: retq %x = zext <16 x i8> %a to <16 x i16> ret <16 x i16> %x } @@ -3052,17 +3048,17 @@ define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_16x8_to_16x16_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 +; GENERIC-NEXT: vpmovb2m %xmm1, %k1 +; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_16x8_to_16x16_mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 +; SKX-NEXT: vpmovb2m %xmm1, %k1 +; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; SKX-NEXT: retq %x = zext <16 x i8> %a to <16 x i16> %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer ret <16 x i16> %ret @@ -3071,13 +3067,13 @@ define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { ; GENERIC-LABEL: sext_16x8_to_16x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_16x8_to_16x16: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 +; SKX-NEXT: retq %x = sext <16 x i8> %a to <16 x i16> ret <16 x i16> %x } @@ -3085,17 +3081,17 @@ define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_16x8_to_16x16_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 +; GENERIC-NEXT: vpmovb2m %xmm1, %k1 +; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_16x8_to_16x16_mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 +; SKX-NEXT: vpmovb2m %xmm1, %k1 +; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} +; SKX-NEXT: retq %x = sext <16 x i8> %a to <16 x i16> %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer ret <16 x i16> %ret @@ -3104,17 +3100,17 @@ define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_32x8mem_to_32x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 +; GENERIC-NEXT: vpmovb2m %ymm0, %k1 +; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_32x8mem_to_32x16: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 +; SKX-NEXT: vpmovb2m %ymm0, %k1 +; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero +; SKX-NEXT: retq %a = load <32 x i8>,<32 x i8> *%i,align 1 %x = zext <32 x i8> %a to <32 x i16> %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer @@ -3124,17 +3120,17 @@ define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_32x8mem_to_32x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 +; GENERIC-NEXT: vpmovb2m %ymm0, %k1 +; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_32x8mem_to_32x16: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 +; SKX-NEXT: vpmovb2m %ymm0, %k1 +; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %a = load <32 x i8>,<32 x i8> *%i,align 1 %x = sext <32 x i8> %a to <32 x i16> %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer @@ -3144,13 +3140,13 @@ define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { ; GENERIC-LABEL: zext_32x8_to_32x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_32x8_to_32x16: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; SKX-NEXT: retq %x = zext <32 x i8> %a to <32 x i16> ret <32 x i16> %x } @@ -3158,17 +3154,17 @@ define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_32x8_to_32x16_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 +; GENERIC-NEXT: vpmovb2m %ymm1, %k1 +; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_32x8_to_32x16_mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 +; SKX-NEXT: vpmovb2m %ymm1, %k1 +; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; SKX-NEXT: retq %x = zext <32 x i8> %a to <32 x i16> %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer ret <32 x i16> %ret @@ -3177,13 +3173,13 @@ define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { ; GENERIC-LABEL: sext_32x8_to_32x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_32x8_to_32x16: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 +; SKX-NEXT: retq %x = sext <32 x i8> %a to <32 x i16> ret <32 x i16> %x } @@ -3191,17 +3187,17 @@ define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_32x8_to_32x16_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 +; GENERIC-NEXT: vpmovb2m %ymm1, %k1 +; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_32x8_to_32x16_mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 +; SKX-NEXT: vpmovb2m %ymm1, %k1 +; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq %x = sext <32 x i8> %a to <32 x i16> %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer ret <32 x i16> %ret @@ -3210,17 +3206,17 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_4x8mem_to_4x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_4x8mem_to_4x32: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k1 +; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; SKX-NEXT: retq %a = load <4 x i8>,<4 x i8> *%i,align 1 %x = zext <4 x i8> %a to <4 x i32> %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer @@ -3230,17 +3226,17 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_4x8mem_to_4x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_4x8mem_to_4x32: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k1 +; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} +; SKX-NEXT: retq %a = load <4 x i8>,<4 x i8> *%i,align 1 %x = sext <4 x i8> %a to <4 x i32> %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer @@ -3250,17 +3246,17 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_8x8mem_to_8x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_8x8mem_to_8x32: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k1 +; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero +; SKX-NEXT: retq %a = load <8 x i8>,<8 x i8> *%i,align 1 %x = zext <8 x i8> %a to <8 x i32> %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer @@ -3270,17 +3266,17 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_8x8mem_to_8x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8x8mem_to_8x32: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k1 +; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} +; SKX-NEXT: retq %a = load <8 x i8>,<8 x i8> *%i,align 1 %x = sext <8 x i8> %a to <8 x i32> %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer @@ -3290,17 +3286,17 @@ define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_16x8mem_to_16x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovb2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_16x8mem_to_16x32: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 +; SKX-NEXT: vpmovb2m %xmm0, %k1 +; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; SKX-NEXT: retq %a = load <16 x i8>,<16 x i8> *%i,align 1 %x = zext <16 x i8> %a to <16 x i32> %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer @@ -3310,17 +3306,17 @@ define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_16x8mem_to_16x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovb2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_16x8mem_to_16x32: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 +; SKX-NEXT: vpmovb2m %xmm0, %k1 +; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %a = load <16 x i8>,<16 x i8> *%i,align 1 %x = sext <16 x i8> %a to <16 x i32> %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer @@ -3330,17 +3326,17 @@ define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_16x8_to_16x32_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 +; GENERIC-NEXT: vpmovb2m %xmm1, %k1 +; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_16x8_to_16x32_mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 +; SKX-NEXT: vpmovb2m %xmm1, %k1 +; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; SKX-NEXT: retq %x = zext <16 x i8> %a to <16 x i32> %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer ret <16 x i32> %ret @@ -3349,17 +3345,17 @@ define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_16x8_to_16x32_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 +; GENERIC-NEXT: vpmovb2m %xmm1, %k1 +; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_16x8_to_16x32_mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 +; SKX-NEXT: vpmovb2m %xmm1, %k1 +; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq %x = sext <16 x i8> %a to <16 x i32> %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer ret <16 x i32> %ret @@ -3368,13 +3364,13 @@ define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { ; GENERIC-LABEL: zext_16x8_to_16x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_16x8_to_16x32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; SKX-NEXT: retq %x = zext <16 x i8> %i to <16 x i32> ret <16 x i32> %x } @@ -3382,13 +3378,13 @@ define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { ; GENERIC-LABEL: sext_16x8_to_16x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_16x8_to_16x32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 +; SKX-NEXT: retq %x = sext <16 x i8> %i to <16 x i32> ret <16 x i32> %x } @@ -3396,17 +3392,17 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_2x8mem_to_2x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovq2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_2x8mem_to_2x64: ; SKX: # %bb.0: -; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 +; SKX-NEXT: vpmovq2m %xmm0, %k1 +; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; SKX-NEXT: retq %a = load <2 x i8>,<2 x i8> *%i,align 1 %x = zext <2 x i8> %a to <2 x i64> %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer @@ -3415,17 +3411,17 @@ define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_2x8mem_to_2x64mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovq2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_2x8mem_to_2x64mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 +; SKX-NEXT: vpmovq2m %xmm0, %k1 +; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} +; SKX-NEXT: retq %a = load <2 x i8>,<2 x i8> *%i,align 1 %x = sext <2 x i8> %a to <2 x i64> %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer @@ -3434,13 +3430,13 @@ define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone { ; GENERIC-LABEL: sext_2x8mem_to_2x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_2x8mem_to_2x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [6:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 +; SKX-NEXT: retq %a = load <2 x i8>,<2 x i8> *%i,align 1 %x = sext <2 x i8> %a to <2 x i64> ret <2 x i64> %x @@ -3449,17 +3445,17 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_4x8mem_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_4x8mem_to_4x64: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k1 +; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero +; SKX-NEXT: retq %a = load <4 x i8>,<4 x i8> *%i,align 1 %x = zext <4 x i8> %a to <4 x i64> %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer @@ -3469,17 +3465,17 @@ define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_4x8mem_to_4x64mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_4x8mem_to_4x64mask: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k1 +; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} +; SKX-NEXT: retq %a = load <4 x i8>,<4 x i8> *%i,align 1 %x = sext <4 x i8> %a to <4 x i64> %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer @@ -3489,13 +3485,13 @@ define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { ; GENERIC-LABEL: sext_4x8mem_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_4x8mem_to_4x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 +; SKX-NEXT: retq %a = load <4 x i8>,<4 x i8> *%i,align 1 %x = sext <4 x i8> %a to <4 x i64> ret <4 x i64> %x @@ -3504,17 +3500,17 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_8x8mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_8x8mem_to_8x64: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k1 +; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero +; SKX-NEXT: retq %a = load <8 x i8>,<8 x i8> *%i,align 1 %x = zext <8 x i8> %a to <8 x i64> %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer @@ -3524,17 +3520,17 @@ define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_8x8mem_to_8x64mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8x8mem_to_8x64mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k1 +; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %a = load <8 x i8>,<8 x i8> *%i,align 1 %x = sext <8 x i8> %a to <8 x i64> %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer @@ -3544,13 +3540,13 @@ define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x8mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8x8mem_to_8x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 +; SKX-NEXT: retq %a = load <8 x i8>,<8 x i8> *%i,align 1 %x = sext <8 x i8> %a to <8 x i64> ret <8 x i64> %x @@ -3559,17 +3555,17 @@ define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_4x16mem_to_4x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_4x16mem_to_4x32: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k1 +; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SKX-NEXT: retq %a = load <4 x i16>,<4 x i16> *%i,align 1 %x = zext <4 x i16> %a to <4 x i32> %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer @@ -3579,17 +3575,17 @@ define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_4x16mem_to_4x32mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_4x16mem_to_4x32mask: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k1 +; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} +; SKX-NEXT: retq %a = load <4 x i16>,<4 x i16> *%i,align 1 %x = sext <4 x i16> %a to <4 x i32> %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer @@ -3599,13 +3595,13 @@ define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_4x16mem_to_4x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_4x16mem_to_4x32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [6:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 +; SKX-NEXT: retq %a = load <4 x i16>,<4 x i16> *%i,align 1 %x = sext <4 x i16> %a to <4 x i32> ret <4 x i32> %x @@ -3615,17 +3611,17 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_8x16mem_to_8x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_8x16mem_to_8x32: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k1 +; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; SKX-NEXT: retq %a = load <8 x i16>,<8 x i16> *%i,align 1 %x = zext <8 x i16> %a to <8 x i32> %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer @@ -3635,17 +3631,17 @@ define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_8x16mem_to_8x32mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8x16mem_to_8x32mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k1 +; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} +; SKX-NEXT: retq %a = load <8 x i16>,<8 x i16> *%i,align 1 %x = sext <8 x i16> %a to <8 x i32> %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer @@ -3655,13 +3651,13 @@ define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x16mem_to_8x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8x16mem_to_8x32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 +; SKX-NEXT: retq %a = load <8 x i16>,<8 x i16> *%i,align 1 %x = sext <8 x i16> %a to <8 x i32> ret <8 x i32> %x @@ -3670,17 +3666,17 @@ define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_8x16_to_8x32mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 +; GENERIC-NEXT: vpmovw2m %xmm1, %k1 +; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_8x16_to_8x32mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 +; SKX-NEXT: vpmovw2m %xmm1, %k1 +; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SKX-NEXT: retq %x = zext <8 x i16> %a to <8 x i32> %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer ret <8 x i32> %ret @@ -3689,13 +3685,13 @@ define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone { ; GENERIC-LABEL: zext_8x16_to_8x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_8x16_to_8x32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SKX-NEXT: retq %x = zext <8 x i16> %a to <8 x i32> ret <8 x i32> %x } @@ -3703,17 +3699,17 @@ define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_16x16mem_to_16x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovb2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_16x16mem_to_16x32: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 +; SKX-NEXT: vpmovb2m %xmm0, %k1 +; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero +; SKX-NEXT: retq %a = load <16 x i16>,<16 x i16> *%i,align 1 %x = zext <16 x i16> %a to <16 x i32> %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer @@ -3723,17 +3719,17 @@ define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_16x16mem_to_16x32mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovb2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_16x16mem_to_16x32mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 +; SKX-NEXT: vpmovb2m %xmm0, %k1 +; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %a = load <16 x i16>,<16 x i16> *%i,align 1 %x = sext <16 x i16> %a to <16 x i32> %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer @@ -3743,13 +3739,13 @@ define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_16x16mem_to_16x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_16x16mem_to_16x32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 +; SKX-NEXT: retq %a = load <16 x i16>,<16 x i16> *%i,align 1 %x = sext <16 x i16> %a to <16 x i32> ret <16 x i32> %x @@ -3757,17 +3753,17 @@ define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_16x16_to_16x32mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 +; GENERIC-NEXT: vpmovb2m %xmm1, %k1 +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_16x16_to_16x32mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 +; SKX-NEXT: vpmovb2m %xmm1, %k1 +; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; SKX-NEXT: retq %x = zext <16 x i16> %a to <16 x i32> %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer ret <16 x i32> %ret @@ -3776,13 +3772,13 @@ define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { ; GENERIC-LABEL: zext_16x16_to_16x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_16x16_to_16x32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; SKX-NEXT: retq %x = zext <16 x i16> %a to <16 x i32> ret <16 x i32> %x } @@ -3790,17 +3786,17 @@ define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_2x16mem_to_2x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovq2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_2x16mem_to_2x64: ; SKX: # %bb.0: -; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 +; SKX-NEXT: vpmovq2m %xmm0, %k1 +; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero +; SKX-NEXT: retq %a = load <2 x i16>,<2 x i16> *%i,align 1 %x = zext <2 x i16> %a to <2 x i64> %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer @@ -3810,17 +3806,17 @@ define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_2x16mem_to_2x64mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovq2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_2x16mem_to_2x64mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 +; SKX-NEXT: vpmovq2m %xmm0, %k1 +; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} +; SKX-NEXT: retq %a = load <2 x i16>,<2 x i16> *%i,align 1 %x = sext <2 x i16> %a to <2 x i64> %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer @@ -3830,13 +3826,13 @@ define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_2x16mem_to_2x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_2x16mem_to_2x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [6:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 +; SKX-NEXT: retq %a = load <2 x i16>,<2 x i16> *%i,align 1 %x = sext <2 x i16> %a to <2 x i64> ret <2 x i64> %x @@ -3845,17 +3841,17 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_4x16mem_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_4x16mem_to_4x64: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k1 +; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; SKX-NEXT: retq %a = load <4 x i16>,<4 x i16> *%i,align 1 %x = zext <4 x i16> %a to <4 x i64> %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer @@ -3865,17 +3861,17 @@ define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_4x16mem_to_4x64mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_4x16mem_to_4x64mask: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k1 +; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} +; SKX-NEXT: retq %a = load <4 x i16>,<4 x i16> *%i,align 1 %x = sext <4 x i16> %a to <4 x i64> %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer @@ -3885,13 +3881,13 @@ define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_4x16mem_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_4x16mem_to_4x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 +; SKX-NEXT: retq %a = load <4 x i16>,<4 x i16> *%i,align 1 %x = sext <4 x i16> %a to <4 x i64> ret <4 x i64> %x @@ -3900,17 +3896,17 @@ define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_8x16mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_8x16mem_to_8x64: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k1 +; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero +; SKX-NEXT: retq %a = load <8 x i16>,<8 x i16> *%i,align 1 %x = zext <8 x i16> %a to <8 x i64> %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer @@ -3920,17 +3916,17 @@ define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_8x16mem_to_8x64mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8x16mem_to_8x64mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k1 +; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %a = load <8 x i16>,<8 x i16> *%i,align 1 %x = sext <8 x i16> %a to <8 x i64> %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer @@ -3940,13 +3936,13 @@ define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x16mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8x16mem_to_8x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 +; SKX-NEXT: retq %a = load <8 x i16>,<8 x i16> *%i,align 1 %x = sext <8 x i16> %a to <8 x i64> ret <8 x i64> %x @@ -3955,17 +3951,17 @@ define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_8x16_to_8x64mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 +; GENERIC-NEXT: vpmovw2m %xmm1, %k1 +; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_8x16_to_8x64mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 +; SKX-NEXT: vpmovw2m %xmm1, %k1 +; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; SKX-NEXT: retq %x = zext <8 x i16> %a to <8 x i64> %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer ret <8 x i64> %ret @@ -3974,13 +3970,13 @@ define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { ; GENERIC-LABEL: zext_8x16_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_8x16_to_8x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; SKX-NEXT: retq %ret = zext <8 x i16> %a to <8 x i64> ret <8 x i64> %ret } @@ -3988,17 +3984,17 @@ define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_2x32mem_to_2x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovq2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_2x32mem_to_2x64: ; SKX: # %bb.0: -; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 +; SKX-NEXT: vpmovq2m %xmm0, %k1 +; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero +; SKX-NEXT: retq %a = load <2 x i32>,<2 x i32> *%i,align 1 %x = zext <2 x i32> %a to <2 x i64> %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer @@ -4008,17 +4004,17 @@ define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_2x32mem_to_2x64mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovq2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_2x32mem_to_2x64mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 +; SKX-NEXT: vpmovq2m %xmm0, %k1 +; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} +; SKX-NEXT: retq %a = load <2 x i32>,<2 x i32> *%i,align 1 %x = sext <2 x i32> %a to <2 x i64> %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer @@ -4028,13 +4024,13 @@ define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone { ; GENERIC-LABEL: sext_2x32mem_to_2x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_2x32mem_to_2x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [6:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 +; SKX-NEXT: retq %a = load <2 x i32>,<2 x i32> *%i,align 1 %x = sext <2 x i32> %a to <2 x i64> ret <2 x i64> %x @@ -4043,17 +4039,17 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_4x32mem_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_4x32mem_to_4x64: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k1 +; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SKX-NEXT: retq %a = load <4 x i32>,<4 x i32> *%i,align 1 %x = zext <4 x i32> %a to <4 x i64> %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer @@ -4063,17 +4059,17 @@ define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_4x32mem_to_4x64mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_4x32mem_to_4x64mask: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k1 +; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} +; SKX-NEXT: retq %a = load <4 x i32>,<4 x i32> *%i,align 1 %x = sext <4 x i32> %a to <4 x i64> %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer @@ -4083,13 +4079,13 @@ define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { ; GENERIC-LABEL: sext_4x32mem_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_4x32mem_to_4x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 +; SKX-NEXT: retq %a = load <4 x i32>,<4 x i32> *%i,align 1 %x = sext <4 x i32> %a to <4 x i64> ret <4 x i64> %x @@ -4098,13 +4094,13 @@ define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone { ; GENERIC-LABEL: sext_4x32_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_4x32_to_4x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 +; SKX-NEXT: retq %x = sext <4 x i32> %a to <4 x i64> ret <4 x i64> %x } @@ -4112,17 +4108,17 @@ define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_4x32_to_4x64mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 +; GENERIC-NEXT: vpmovd2m %xmm1, %k1 +; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_4x32_to_4x64mask: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm1, %xmm1 +; SKX-NEXT: vpmovd2m %xmm1, %k1 +; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; SKX-NEXT: retq %x = zext <4 x i32> %a to <4 x i64> %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer ret <4 x i64> %ret @@ -4131,17 +4127,17 @@ define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_8x32mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k1 +; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_8x32mem_to_8x64: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k1 +; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; SKX-NEXT: retq %a = load <8 x i32>,<8 x i32> *%i,align 1 %x = zext <8 x i32> %a to <8 x i64> %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer @@ -4151,17 +4147,17 @@ define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: sext_8x32mem_to_8x64mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k1 +; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8x32mem_to_8x64mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k1 +; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %a = load <8 x i32>,<8 x i32> *%i,align 1 %x = sext <8 x i32> %a to <8 x i64> %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer @@ -4171,13 +4167,13 @@ define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x32mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8x32mem_to_8x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 +; SKX-NEXT: retq %a = load <8 x i32>,<8 x i32> *%i,align 1 %x = sext <8 x i32> %a to <8 x i64> ret <8 x i64> %x @@ -4186,13 +4182,13 @@ define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { ; GENERIC-LABEL: sext_8x32_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxdq %ymm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8x32_to_8x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxdq %ymm0, %zmm0 +; SKX-NEXT: retq %x = sext <8 x i32> %a to <8 x i64> ret <8 x i64> %x } @@ -4200,17 +4196,17 @@ define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: zext_8x32_to_8x64mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 +; GENERIC-NEXT: vpmovw2m %xmm1, %k1 +; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_8x32_to_8x64mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 +; SKX-NEXT: vpmovw2m %xmm1, %k1 +; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; SKX-NEXT: retq %x = zext <8 x i32> %a to <8 x i64> %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer ret <8 x i64> %ret @@ -4218,13 +4214,13 @@ define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { ; GENERIC-LABEL: fptrunc_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: fptrunc_test: ; SKX: # %bb.0: -; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 +; SKX-NEXT: retq %b = fptrunc <8 x double> %a to <8 x float> ret <8 x float> %b } @@ -4232,13 +4228,13 @@ define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { ; GENERIC-LABEL: fpext_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: fpext_test: ; SKX: # %bb.0: -; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 +; SKX-NEXT: retq %b = fpext <8 x float> %a to <8 x double> ret <8 x double> %b } @@ -4246,17 +4242,17 @@ define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { ; GENERIC-LABEL: zext_16i1_to_16xi32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k0 +; GENERIC-NEXT: vpmovm2d %k0, %zmm0 +; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_16i1_to_16xi32: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: vpmovm2d %k0, %zmm0 +; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 +; SKX-NEXT: retq %a = bitcast i16 %b to <16 x i1> %c = zext <16 x i1> %a to <16 x i32> ret <16 x i32> %c @@ -4265,17 +4261,17 @@ define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { ; GENERIC-LABEL: zext_8i1_to_8xi64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k0 +; GENERIC-NEXT: vpmovm2q %k0, %zmm0 +; GENERIC-NEXT: vpsrlq $63, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_8i1_to_8xi64: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: vpmovm2q %k0, %zmm0 +; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 +; SKX-NEXT: retq %a = bitcast i8 %b to <8 x i1> %c = zext <8 x i1> %a to <8 x i64> ret <8 x i64> %c @@ -4284,17 +4280,17 @@ define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { ; GENERIC-LABEL: trunc_16i8_to_16i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovmskb %xmm0, %eax ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: trunc_16i8_to_16i1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] +; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 +; SKX-NEXT: vpmovmskb %xmm0, %eax ; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %mask_b = trunc <16 x i8>%a to <16 x i1> %mask = bitcast <16 x i1> %mask_b to i16 ret i16 %mask @@ -4303,21 +4299,21 @@ define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { ; GENERIC-LABEL: trunc_16i32_to_16i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] +; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 +; GENERIC-NEXT: vpmovd2m %zmm0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: trunc_16i32_to_16i1: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] +; SKX-NEXT: vpslld $31, %zmm0, %zmm0 +; SKX-NEXT: vpmovd2m %zmm0, %k0 +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %mask_b = trunc <16 x i32>%a to <16 x i1> %mask = bitcast <16 x i1> %mask_b to i16 ret i16 %mask @@ -4326,17 +4322,17 @@ define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { ; GENERIC-LABEL: trunc_4i32_to_4i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpand %xmm1, %xmm0, %xmm0 +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vpsrad $31, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: trunc_4i32_to_4i1: ; SKX: # %bb.0: -; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpsrad $31, %xmm0, %xmm0 +; SKX-NEXT: retq %mask_a = trunc <4 x i32>%a to <4 x i1> %mask_b = trunc <4 x i32>%b to <4 x i1> %a_and_b = and <4 x i1>%mask_a, %mask_b @@ -4348,19 +4344,19 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { ; GENERIC-LABEL: trunc_8i16_to_8i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax ; GENERIC-NEXT: # kill: def $al killed $al killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: trunc_8i16_to_8i1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k0 +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: # kill: def $al killed $al killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %mask_b = trunc <8 x i16>%a to <8 x i1> %mask = bitcast <8 x i1> %mask_b to i8 ret i8 %mask @@ -4369,15 +4365,15 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { ; GENERIC-LABEL: sext_8i1_8i32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 +; GENERIC-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8i1_8i32: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 +; SKX-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 +; SKX-NEXT: retq %x = icmp slt <8 x i32> %a1, %a2 %x1 = xor <8 x i1>%x, %y = sext <8 x i1> %x1 to <8 x i32> @@ -4388,29 +4384,29 @@ define i16 @trunc_i32_to_i1(i32 %a) { ; GENERIC-LABEL: trunc_i32_to_i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movw $-4, %ax # sched: [1:0.33] -; GENERIC-NEXT: kmovd %eax, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kshiftrw $1, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: kshiftlw $1, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] -; GENERIC-NEXT: kmovw %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: korw %k1, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] +; GENERIC-NEXT: movw $-4, %ax +; GENERIC-NEXT: kmovd %eax, %k0 +; GENERIC-NEXT: kshiftrw $1, %k0, %k0 +; GENERIC-NEXT: kshiftlw $1, %k0, %k0 +; GENERIC-NEXT: andl $1, %edi +; GENERIC-NEXT: kmovw %edi, %k1 +; GENERIC-NEXT: korw %k1, %k0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: trunc_i32_to_i1: ; SKX: # %bb.0: -; SKX-NEXT: movw $-4, %ax # sched: [1:0.25] -; SKX-NEXT: kmovd %eax, %k0 # sched: [1:1.00] -; SKX-NEXT: kshiftrw $1, %k0, %k0 # sched: [3:1.00] -; SKX-NEXT: kshiftlw $1, %k0, %k0 # sched: [3:1.00] -; SKX-NEXT: andl $1, %edi # sched: [1:0.25] -; SKX-NEXT: kmovw %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] +; SKX-NEXT: movw $-4, %ax +; SKX-NEXT: kmovd %eax, %k0 +; SKX-NEXT: kshiftrw $1, %k0, %k0 +; SKX-NEXT: kshiftlw $1, %k0, %k0 +; SKX-NEXT: andl $1, %edi +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %a_i = trunc i32 %a to i1 %maskv = insertelement <16 x i1> , i1 %a_i, i32 0 %res = bitcast <16 x i1> %maskv to i16 @@ -4420,17 +4416,17 @@ define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { ; GENERIC-LABEL: sext_8i1_8i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50] -; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 +; GENERIC-NEXT: vpmovm2w %k0, %xmm0 +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8i1_8i16: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 +; SKX-NEXT: vpmovm2w %k0, %xmm0 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %x = icmp slt <8 x i32> %a1, %a2 %y = sext <8 x i1> %x to <8 x i16> ret <8 x i16> %y @@ -4439,15 +4435,15 @@ define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { ; GENERIC-LABEL: sext_16i1_16i32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [1:0.50] -; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 +; GENERIC-NEXT: vpmovm2d %k0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_16i1_16i32: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 +; SKX-NEXT: vpmovm2d %k0, %zmm0 +; SKX-NEXT: retq %x = icmp slt <16 x i32> %a1, %a2 %y = sext <16 x i1> %x to <16 x i32> ret <16 x i32> %y @@ -4456,15 +4452,15 @@ define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { ; GENERIC-LABEL: sext_8i1_8i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50] -; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 +; GENERIC-NEXT: vpmovm2q %k0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: sext_8i1_8i64: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 +; SKX-NEXT: vpmovm2q %k0, %zmm0 +; SKX-NEXT: retq %x = icmp slt <8 x i32> %a1, %a2 %y = sext <8 x i1> %x to <8 x i64> ret <8 x i64> %y @@ -4473,17 +4469,17 @@ define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { ; GENERIC-LABEL: extload_v8i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 +; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: extload_v8i64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00] -; SKX-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 +; SKX-NEXT: vmovdqa64 %zmm0, (%rsi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %sign_load = load <8 x i8>, <8 x i8>* %a %c = sext <8 x i8> %sign_load to <8 x i64> store <8 x i64> %c, <8 x i64>* %res @@ -4493,21 +4489,21 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: test21: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] -; GENERIC-NEXT: kshiftrq $32, %k1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2 +; GENERIC-NEXT: vpmovb2m %zmm2, %k1 +; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: kshiftrq $32, %k1, %k1 +; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test21: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00] -; SKX-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:1.00] -; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] -; SKX-NEXT: kshiftrq $32, %k1, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 +; SKX-NEXT: vpmovb2m %zmm2, %k1 +; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: kshiftrq $32, %k1, %k1 +; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} +; SKX-NEXT: retq %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer ret <64 x i16> %ret } @@ -4515,13 +4511,13 @@ define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone { ; GENERIC-LABEL: shuffle_zext_16x8_to_16x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: shuffle_zext_16x8_to_16x16: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; SKX-NEXT: retq %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> %2 = bitcast <32 x i8> %1 to <16 x i16> ret <16 x i16> %2 @@ -4530,17 +4526,17 @@ define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: shuffle_zext_16x8_to_16x16_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 +; GENERIC-NEXT: vpmovb2m %xmm1, %k1 +; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 +; SKX-NEXT: vpmovb2m %xmm1, %k1 +; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; SKX-NEXT: retq %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> %bc = bitcast <32 x i8> %x to <16 x i16> %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer @@ -4550,13 +4546,13 @@ define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) { ; GENERIC-LABEL: zext_32x8_to_16x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_32x8_to_16x16: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; SKX-NEXT: retq %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> %2 = bitcast <32 x i8> %1 to <16 x i16> ret <16 x i16> %2 @@ -4565,13 +4561,13 @@ define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) { ; GENERIC-LABEL: zext_32x8_to_8x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_32x8_to_8x32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; SKX-NEXT: retq %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> %2 = bitcast <32 x i8> %1 to <8 x i32> ret <8 x i32> %2 @@ -4580,13 +4576,13 @@ define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) { ; GENERIC-LABEL: zext_32x8_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_32x8_to_4x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero +; SKX-NEXT: retq %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> %2 = bitcast <32 x i8> %1 to <4 x i64> ret <4 x i64> %2 @@ -4595,13 +4591,13 @@ define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) { ; GENERIC-LABEL: zext_16x16_to_8x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_16x16_to_8x32: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SKX-NEXT: retq %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> %2 = bitcast <16 x i16> %1 to <8 x i32> ret <8 x i32> %2 @@ -4610,13 +4606,13 @@ define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) { ; GENERIC-LABEL: zext_16x16_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_16x16_to_4x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; SKX-NEXT: retq %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> %2 = bitcast <16 x i16> %1 to <4 x i64> ret <4 x i64> %2 @@ -4625,13 +4621,13 @@ define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) { ; GENERIC-LABEL: zext_8x32_to_4x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_8x32_to_4x64: ; SKX: # %bb.0: -; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; SKX-NEXT: retq %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> %2 = bitcast <8 x i32> %1 to <4 x i64> ret <4 x i64> %2 @@ -4640,15 +4636,15 @@ define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 { ; GENERIC-LABEL: zext_64xi1_to_64xi8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [1:0.50] -; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 +; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_64xi1_to_64xi8: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 +; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = icmp eq <64 x i8> %x, %y %1 = zext <64 x i1> %mask to <64 x i8> ret <64 x i8> %1 @@ -4657,17 +4653,17 @@ define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { ; GENERIC-LABEL: zext_32xi1_to_32xi16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [1:0.50] -; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 +; GENERIC-NEXT: vpmovm2w %k0, %zmm0 +; GENERIC-NEXT: vpsrlw $15, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_32xi1_to_32xi16: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 +; SKX-NEXT: vpmovm2w %k0, %zmm0 +; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 +; SKX-NEXT: retq %mask = icmp eq <32 x i16> %x, %y %1 = zext <32 x i1> %mask to <32 x i16> ret <32 x i16> %1 @@ -4676,15 +4672,15 @@ define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 { ; GENERIC-LABEL: zext_16xi1_to_16xi16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; GENERIC-NEXT: vpsrlw $15, %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_16xi1_to_16xi16: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; SKX-NEXT: vpsrlw $15, %ymm0, %ymm0 +; SKX-NEXT: retq %mask = icmp eq <16 x i16> %x, %y %1 = zext <16 x i1> %mask to <16 x i16> ret <16 x i16> %1 @@ -4694,15 +4690,15 @@ define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { ; GENERIC-LABEL: zext_32xi1_to_32xi8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [1:0.50] -; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 +; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_32xi1_to_32xi8: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 +; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} +; SKX-NEXT: retq %mask = icmp eq <32 x i16> %x, %y %1 = zext <32 x i1> %mask to <32 x i8> ret <32 x i8> %1 @@ -4711,21 +4707,21 @@ define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 { ; GENERIC-LABEL: zext_4xi1_to_4x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255] sched: [7:0.50] -; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] -; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255] +; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 +; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 +; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_4xi1_to_4x32: ; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255] sched: [6:0.50] -; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255] +; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 +; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 +; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 +; SKX-NEXT: retq %mask = icmp eq <4 x i8> %x, %y %1 = zext <4 x i1> %mask to <4 x i32> ret <4 x i32> %1 @@ -4734,21 +4730,21 @@ define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 { ; GENERIC-LABEL: zext_2xi1_to_2xi64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastq {{.*#+}} xmm2 = [255,255] sched: [7:0.50] -; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] -; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpbroadcastq {{.*#+}} xmm2 = [255,255] +; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 +; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 +; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; GENERIC-NEXT: vpsrlq $63, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_2xi1_to_2xi64: ; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastq {{.*#+}} xmm2 = [255,255] sched: [6:0.50] -; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpbroadcastq {{.*#+}} xmm2 = [255,255] +; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 +; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 +; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 +; SKX-NEXT: retq %mask = icmp eq <2 x i8> %x, %y %1 = zext <2 x i1> %mask to <2 x i64> ret <2 x i64> %1 @@ -4757,15 +4753,15 @@ define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; GENERIC-LABEL: test_x86_fmadd_ps_z: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: vaddps %zmm2, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_x86_fmadd_ps_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: vaddps %zmm2, %zmm0, %zmm0 +; SKX-NEXT: retq %x = fmul <16 x float> %a0, %a1 %res = fadd <16 x float> %x, %a2 ret <16 x float> %res @@ -4774,15 +4770,15 @@ define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; GENERIC-LABEL: test_x86_fmsub_ps_z: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_x86_fmsub_ps_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 +; SKX-NEXT: retq %x = fmul <16 x float> %a0, %a1 %res = fsub <16 x float> %x, %a2 ret <16 x float> %res @@ -4791,15 +4787,15 @@ define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; GENERIC-LABEL: test_x86_fnmadd_ps_z: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: vsubps %zmm0, %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_x86_fnmadd_ps_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: vsubps %zmm0, %zmm2, %zmm0 +; SKX-NEXT: retq %x = fmul <16 x float> %a0, %a1 %res = fsub <16 x float> %a2, %x ret <16 x float> %res @@ -4808,17 +4804,17 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; GENERIC-LABEL: test_x86_fnmsub_ps_z: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_x86_fnmsub_ps_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 +; SKX-NEXT: retq %x = fmul <16 x float> %a0, %a1 %y = fsub <16 x float> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; GENERIC-LABEL: test_x86_fmadd_pd_z: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: vaddpd %zmm2, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_x86_fmadd_pd_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 +; SKX-NEXT: vaddpd %zmm2, %zmm0, %zmm0 +; SKX-NEXT: retq %x = fmul <8 x double> %a0, %a1 %res = fadd <8 x double> %x, %a2 ret <8 x double> %res @@ -4848,15 +4844,15 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; GENERIC-LABEL: test_x86_fmsub_pd_z: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: vsubpd %zmm2, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_x86_fmsub_pd_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 +; SKX-NEXT: vsubpd %zmm2, %zmm0, %zmm0 +; SKX-NEXT: retq %x = fmul <8 x double> %a0, %a1 %res = fsub <8 x double> %x, %a2 ret <8 x double> %res @@ -4865,15 +4861,15 @@ define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { ; GENERIC-LABEL: test_x86_fmsub_213: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 +; GENERIC-NEXT: vsubsd %xmm2, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_x86_fmsub_213: ; SKX: # %bb.0: -; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 +; SKX-NEXT: vsubsd %xmm2, %xmm0, %xmm0 +; SKX-NEXT: retq %x = fmul double %a0, %a1 %res = fsub double %x, %a2 ret double %res @@ -4882,15 +4878,15 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { ; GENERIC-LABEL: test_x86_fmsub_213_m: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 +; GENERIC-NEXT: vsubsd (%rdi), %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_x86_fmsub_213_m: ; SKX: # %bb.0: -; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 +; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 +; SKX-NEXT: retq %a2 = load double , double *%a2_ptr %x = fmul double %a0, %a1 %res = fsub double %x, %a2 @@ -4900,15 +4896,15 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { ; GENERIC-LABEL: test_x86_fmsub_231_m: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulsd (%rdi), %xmm0, %xmm0 +; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_x86_fmsub_231_m: ; SKX: # %bb.0: -; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 +; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; SKX-NEXT: retq %a2 = load double , double *%a2_ptr %x = fmul double %a0, %a2 %res = fsub double %x, %a1 @@ -4918,15 +4914,15 @@ define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; GENERIC-LABEL: test231_br: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [12:1.00] -; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test231_br: ; SKX: # %bb.0: -; SKX-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq %b1 = fmul <16 x float> %a1, %b2 = fadd <16 x float> %b1, %a2 ret <16 x float> %b2 @@ -4935,15 +4931,15 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; GENERIC-LABEL: test213_br: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test213_br: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] -; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: retq %b1 = fmul <16 x float> %a1, %a2 %b2 = fadd <16 x float> %b1, ret <16 x float> %b2 @@ -4953,19 +4949,19 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { ; GENERIC-LABEL: test_x86_fmadd132_ps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [12:1.00] -; GENERIC-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 +; GENERIC-NEXT: vpmovb2m %xmm2, %k1 +; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2 +; GENERIC-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_x86_fmadd132_ps: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] -; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [11:0.50] -; SKX-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 +; SKX-NEXT: vpmovb2m %xmm2, %k1 +; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm2 +; SKX-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 %x = fmul <16 x float> %a0, %a2 %y = fadd <16 x float> %x, %a1 @@ -4977,21 +4973,21 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { ; GENERIC-LABEL: test_x86_fmadd231_ps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [12:1.00] -; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 +; GENERIC-NEXT: vpmovb2m %xmm2, %k1 +; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_x86_fmadd231_ps: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] -; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.50] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 +; SKX-NEXT: vpmovb2m %xmm2, %k1 +; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm0 +; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 %x = fmul <16 x float> %a0, %a2 %y = fadd <16 x float> %x, %a1 @@ -5003,21 +4999,21 @@ define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { ; GENERIC-LABEL: test_x86_fmadd213_ps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [10:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 +; GENERIC-NEXT: vpmovb2m %xmm2, %k1 +; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_x86_fmadd213_ps: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] -; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] -; SKX-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [11:0.50] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 +; SKX-NEXT: vpmovb2m %xmm2, %k1 +; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 %x = fmul <16 x float> %a1, %a0 %y = fadd <16 x float> %x, %a2 @@ -5028,15 +5024,15 @@ define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpandd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpandd %zmm1, %zmm0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; GENERIC-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpandd: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpandd %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq entry: ; Force the execution domain with an add. %a2 = add <16 x i32> %a, @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpandnd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpandnd %zmm0, %zmm1, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; GENERIC-NEXT: vpandnd %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpandnd: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpandnd %zmm0, %zmm1, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: vpandnd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq entry: ; Force the execution domain with an add. %a2 = add <16 x i32> %a, @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpord: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpord %zmm1, %zmm0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; GENERIC-NEXT: vpord %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpord: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpord %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: vpord %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq entry: ; Force the execution domain with an add. %a2 = add <16 x i32> %a, @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpxord: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpxord %zmm1, %zmm0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; GENERIC-NEXT: vpxord %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpxord: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpxord %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; SKX-NEXT: vpxord %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq entry: ; Force the execution domain with an add. %a2 = add <16 x i32> %a, @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpandq: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpandq: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq entry: ; Force the execution domain with an add. %a2 = add <8 x i64> %a, @@ -5129,15 +5125,15 @@ define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpandnq: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpandnq: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq entry: ; Force the execution domain with an add. %a2 = add <8 x i64> %a, @@ -5149,15 +5145,15 @@ define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vporq: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vporq: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq entry: ; Force the execution domain with an add. %a2 = add <8 x i64> %a, @@ -5168,15 +5164,15 @@ define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; GENERIC-LABEL: vpxorq: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpxorq: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq entry: ; Force the execution domain with an add. %a2 = add <8 x i64> %a, @@ -5187,13 +5183,13 @@ define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) { ; GENERIC-LABEL: and_v64i8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: and_v64i8: ; SKX: # %bb.0: -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq %res = and <64 x i8> %a, %b ret <64 x i8> %res } @@ -5201,13 +5197,13 @@ define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) { ; GENERIC-LABEL: andn_v64i8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: andn_v64i8: ; SKX: # %bb.0: -; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq %b2 = xor <64 x i8> %b, @or_v64i8(<64 x i8> %a, <64 x i8> %b) { ; GENERIC-LABEL: or_v64i8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: or_v64i8: ; SKX: # %bb.0: -; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq %res = or <64 x i8> %a, %b ret <64 x i8> %res } @@ -5233,13 +5229,13 @@ define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) { ; GENERIC-LABEL: xor_v64i8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: xor_v64i8: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq %res = xor <64 x i8> %a, %b ret <64 x i8> %res } @@ -5247,13 +5243,13 @@ define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) { ; GENERIC-LABEL: and_v32i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: and_v32i16: ; SKX: # %bb.0: -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq %res = and <32 x i16> %a, %b ret <32 x i16> %res } @@ -5261,13 +5257,13 @@ define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) { ; GENERIC-LABEL: andn_v32i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: andn_v32i16: ; SKX: # %bb.0: -; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq %b2 = xor <32 x i16> %b, %res = and <32 x i16> %a, %b2 @@ -5277,13 +5273,13 @@ define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) { ; GENERIC-LABEL: or_v32i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: or_v32i16: ; SKX: # %bb.0: -; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq %res = or <32 x i16> %a, %b ret <32 x i16> %res } @@ -5291,13 +5287,13 @@ define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) { ; GENERIC-LABEL: xor_v32i16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: xor_v32i16: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: retq %res = xor <32 x i16> %a, %b ret <32 x i16> %res } @@ -5305,17 +5301,17 @@ define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { ; GENERIC-LABEL: masked_and_v16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} +; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: masked_and_v16f32: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] -; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} +; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 +; SKX-NEXT: retq %a1 = bitcast <16 x float> %a to <16 x i32> %b1 = bitcast <16 x float> %b to <16 x i32> %passThru1 = bitcast <16 x float> %passThru to <16 x i32> @@ -5330,17 +5326,17 @@ define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { ; GENERIC-LABEL: masked_or_v16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} +; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: masked_or_v16f32: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] -; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} +; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 +; SKX-NEXT: retq %a1 = bitcast <16 x float> %a to <16 x i32> %b1 = bitcast <16 x float> %b to <16 x i32> %passThru1 = bitcast <16 x float> %passThru to <16 x i32> @@ -5355,17 +5351,17 @@ define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { ; GENERIC-LABEL: masked_xor_v16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} +; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: masked_xor_v16f32: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] -; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} +; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 +; SKX-NEXT: retq %a1 = bitcast <16 x float> %a to <16 x i32> %b1 = bitcast <16 x float> %b to <16 x i32> %passThru1 = bitcast <16 x float> %passThru to <16 x i32> @@ -5380,17 +5376,17 @@ define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { ; GENERIC-LABEL: masked_and_v8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} +; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: masked_and_v8f64: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] -; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} +; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 +; SKX-NEXT: retq %a1 = bitcast <8 x double> %a to <8 x i64> %b1 = bitcast <8 x double> %b to <8 x i64> %passThru1 = bitcast <8 x double> %passThru to <8 x i64> @@ -5405,17 +5401,17 @@ define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { ; GENERIC-LABEL: masked_or_v8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} +; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: masked_or_v8f64: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] -; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} +; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 +; SKX-NEXT: retq %a1 = bitcast <8 x double> %a to <8 x i64> %b1 = bitcast <8 x double> %b to <8 x i64> %passThru1 = bitcast <8 x double> %passThru to <8 x i64> @@ -5430,17 +5426,17 @@ define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { ; GENERIC-LABEL: masked_xor_v8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} +; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: masked_xor_v8f64: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] -; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} +; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 +; SKX-NEXT: retq %a1 = bitcast <8 x double> %a to <8 x i64> %b1 = bitcast <8 x double> %b to <8 x i64> %passThru1 = bitcast <8 x double> %passThru to <8 x i64> @@ -5455,15 +5451,15 @@ define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { ; GENERIC-LABEL: test_mm512_mask_and_epi32: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_mask_and_epi32: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq entry: %and1.i.i = and <8 x i64> %__a, %__b %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> @@ -5477,15 +5473,15 @@ define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { ; GENERIC-LABEL: test_mm512_mask_or_epi32: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_mask_or_epi32: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq entry: %or1.i.i = or <8 x i64> %__a, %__b %0 = bitcast <8 x i64> %or1.i.i to <16 x i32> @@ -5499,15 +5495,15 @@ define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { ; GENERIC-LABEL: test_mm512_mask_xor_epi32: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_mask_xor_epi32: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq entry: %xor1.i.i = xor <8 x i64> %__a, %__b %0 = bitcast <8 x i64> %xor1.i.i to <16 x i32> @@ -5521,15 +5517,15 @@ define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; GENERIC-LABEL: test_mm512_mask_xor_pd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_mask_xor_pd: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq entry: %0 = bitcast <8 x double> %__A to <8 x i64> %1 = bitcast <8 x double> %__B to <8 x i64> @@ -5543,15 +5539,15 @@ define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; GENERIC-LABEL: test_mm512_maskz_xor_pd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_maskz_xor_pd: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq entry: %0 = bitcast <8 x double> %__A to <8 x i64> %1 = bitcast <8 x double> %__B to <8 x i64> @@ -5565,15 +5561,15 @@ define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; GENERIC-LABEL: test_mm512_mask_xor_ps: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_mask_xor_ps: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq entry: %0 = bitcast <16 x float> %__A to <16 x i32> %1 = bitcast <16 x float> %__B to <16 x i32> @@ -5587,15 +5583,15 @@ define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; GENERIC-LABEL: test_mm512_maskz_xor_ps: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_maskz_xor_ps: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq entry: %0 = bitcast <16 x float> %__A to <16 x i32> %1 = bitcast <16 x float> %__B to <16 x i32> @@ -5609,15 +5605,15 @@ define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; GENERIC-LABEL: test_mm512_mask_or_pd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_mask_or_pd: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq entry: %0 = bitcast <8 x double> %__A to <8 x i64> %1 = bitcast <8 x double> %__B to <8 x i64> @@ -5631,15 +5627,15 @@ define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; GENERIC-LABEL: test_mm512_maskz_or_pd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_maskz_or_pd: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq entry: %0 = bitcast <8 x double> %__A to <8 x i64> %1 = bitcast <8 x double> %__B to <8 x i64> @@ -5653,15 +5649,15 @@ define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; GENERIC-LABEL: test_mm512_mask_or_ps: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_mask_or_ps: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq entry: %0 = bitcast <16 x float> %__A to <16 x i32> %1 = bitcast <16 x float> %__B to <16 x i32> @@ -5675,15 +5671,15 @@ define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; GENERIC-LABEL: test_mm512_maskz_or_ps: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_maskz_or_ps: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq entry: %0 = bitcast <16 x float> %__A to <16 x i32> %1 = bitcast <16 x float> %__B to <16 x i32> @@ -5697,15 +5693,15 @@ define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; GENERIC-LABEL: test_mm512_mask_and_pd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_mask_and_pd: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq entry: %0 = bitcast <8 x double> %__A to <8 x i64> %1 = bitcast <8 x double> %__B to <8 x i64> @@ -5719,15 +5715,15 @@ define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; GENERIC-LABEL: test_mm512_maskz_and_pd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_maskz_and_pd: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq entry: %0 = bitcast <8 x double> %__A to <8 x i64> %1 = bitcast <8 x double> %__B to <8 x i64> @@ -5741,15 +5737,15 @@ define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; GENERIC-LABEL: test_mm512_mask_and_ps: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_mask_and_ps: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq entry: %0 = bitcast <16 x float> %__A to <16 x i32> %1 = bitcast <16 x float> %__B to <16 x i32> @@ -5763,15 +5759,15 @@ define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; GENERIC-LABEL: test_mm512_maskz_and_ps: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_maskz_and_ps: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq entry: %0 = bitcast <16 x float> %__A to <16 x i32> %1 = bitcast <16 x float> %__B to <16 x i32> @@ -5785,15 +5781,15 @@ define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; GENERIC-LABEL: test_mm512_mask_andnot_pd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_mask_andnot_pd: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq entry: %0 = bitcast <8 x double> %__A to <8 x i64> %neg.i.i = xor <8 x i64> %0, @@ -5808,15 +5804,15 @@ define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; GENERIC-LABEL: test_mm512_maskz_andnot_pd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_maskz_andnot_pd: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq entry: %0 = bitcast <8 x double> %__A to <8 x i64> %neg.i.i = xor <8 x i64> %0, @@ -5831,15 +5827,15 @@ define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; GENERIC-LABEL: test_mm512_mask_andnot_ps: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_mask_andnot_ps: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} +; SKX-NEXT: retq entry: %0 = bitcast <16 x float> %__A to <16 x i32> %neg.i.i = xor <16 x i32> %0, @@ -5854,15 +5850,15 @@ define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; GENERIC-LABEL: test_mm512_maskz_andnot_ps: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k1 +; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_maskz_andnot_ps: ; SKX: # %bb.0: # %entry -; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k1 +; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq entry: %0 = bitcast <16 x float> %__A to <16 x i32> %neg.i.i = xor <16 x i32> %0, @@ -5877,13 +5873,13 @@ define i32 @mov_test1(float %x) { ; GENERIC-LABEL: mov_test1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovd %xmm0, %eax +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test1: ; SKX: # %bb.0: -; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovd %xmm0, %eax +; SKX-NEXT: retq %res = bitcast float %x to i32 ret i32 %res } @@ -5891,13 +5887,13 @@ define <4 x i32> @mov_test2(i32 %x) { ; GENERIC-LABEL: mov_test2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovd %edi, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test2: ; SKX: # %bb.0: -; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovd %edi, %xmm0 +; SKX-NEXT: retq %res = insertelement <4 x i32>undef, i32 %x, i32 0 ret <4 x i32>%res } @@ -5905,13 +5901,13 @@ define <2 x i64> @mov_test3(i64 %x) { ; GENERIC-LABEL: mov_test3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovq %rdi, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test3: ; SKX: # %bb.0: -; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovq %rdi, %xmm0 +; SKX-NEXT: retq %res = insertelement <2 x i64>undef, i64 %x, i32 0 ret <2 x i64>%res } @@ -5919,13 +5915,13 @@ define <4 x i32> @mov_test4(i32* %x) { ; GENERIC-LABEL: mov_test4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test4: ; SKX: # %bb.0: -; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SKX-NEXT: retq %y = load i32, i32* %x %res = insertelement <4 x i32>undef, i32 %y, i32 0 ret <4 x i32>%res @@ -5934,13 +5930,13 @@ define void @mov_test5(float %x, float* %y) { ; GENERIC-LABEL: mov_test5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovss %xmm0, (%rdi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test5: ; SKX: # %bb.0: -; SKX-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovss %xmm0, (%rdi) +; SKX-NEXT: retq store float %x, float* %y, align 4 ret void } @@ -5948,13 +5944,13 @@ define void @mov_test6(double %x, double* %y) { ; GENERIC-LABEL: mov_test6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovsd %xmm0, (%rdi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test6: ; SKX: # %bb.0: -; SKX-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovsd %xmm0, (%rdi) +; SKX-NEXT: retq store double %x, double* %y, align 8 ret void } @@ -5962,13 +5958,13 @@ define float @mov_test7(i32* %x) { ; GENERIC-LABEL: mov_test7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test7: ; SKX: # %bb.0: -; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SKX-NEXT: retq %y = load i32, i32* %x %res = bitcast i32 %y to float ret float %res @@ -5977,13 +5973,13 @@ define i32 @mov_test8(<4 x i32> %x) { ; GENERIC-LABEL: mov_test8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovd %xmm0, %eax +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test8: ; SKX: # %bb.0: -; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovd %xmm0, %eax +; SKX-NEXT: retq %res = extractelement <4 x i32> %x, i32 0 ret i32 %res } @@ -5991,13 +5987,13 @@ define i64 @mov_test9(<2 x i64> %x) { ; GENERIC-LABEL: mov_test9: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovq %xmm0, %rax +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test9: ; SKX: # %bb.0: -; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovq %xmm0, %rax +; SKX-NEXT: retq %res = extractelement <2 x i64> %x, i32 0 ret i64 %res } @@ -6005,13 +6001,13 @@ define <4 x i32> @mov_test10(i32* %x) { ; GENERIC-LABEL: mov_test10: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test10: ; SKX: # %bb.0: -; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SKX-NEXT: retq %y = load i32, i32* %x, align 4 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 ret <4 x i32>%res @@ -6020,13 +6016,13 @@ define <4 x float> @mov_test11(float* %x) { ; GENERIC-LABEL: mov_test11: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test11: ; SKX: # %bb.0: -; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SKX-NEXT: retq %y = load float, float* %x, align 4 %res = insertelement <4 x float>zeroinitializer, float %y, i32 0 ret <4 x float>%res @@ -6035,13 +6031,13 @@ define <2 x double> @mov_test12(double* %x) { ; GENERIC-LABEL: mov_test12: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test12: ; SKX: # %bb.0: -; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; SKX-NEXT: retq %y = load double, double* %x, align 8 %res = insertelement <2 x double>zeroinitializer, double %y, i32 0 ret <2 x double>%res @@ -6050,13 +6046,13 @@ define <2 x i64> @mov_test13(i64 %x) { ; GENERIC-LABEL: mov_test13: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovq %rdi, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test13: ; SKX: # %bb.0: -; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovq %rdi, %xmm0 +; SKX-NEXT: retq %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 ret <2 x i64>%res } @@ -6064,13 +6060,13 @@ define <4 x i32> @mov_test14(i32 %x) { ; GENERIC-LABEL: mov_test14: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovd %edi, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test14: ; SKX: # %bb.0: -; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovd %edi, %xmm0 +; SKX-NEXT: retq %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 ret <4 x i32>%res } @@ -6078,13 +6074,13 @@ define <4 x i32> @mov_test15(i32* %x) { ; GENERIC-LABEL: mov_test15: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test15: ; SKX: # %bb.0: -; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SKX-NEXT: retq %y = load i32, i32* %x, align 4 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 ret <4 x i32>%res @@ -6093,13 +6089,13 @@ define <16 x i32> @mov_test16(i8 * %addr) { ; GENERIC-LABEL: mov_test16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovups (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test16: ; SKX: # %bb.0: -; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovups (%rdi), %zmm0 +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <16 x i32>* %res = load <16 x i32>, <16 x i32>* %vaddr, align 1 ret <16 x i32>%res @@ -6108,13 +6104,13 @@ define <16 x i32> @mov_test17(i8 * %addr) { ; GENERIC-LABEL: mov_test17: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test17: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps (%rdi), %zmm0 +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <16 x i32>* %res = load <16 x i32>, <16 x i32>* %vaddr, align 64 ret <16 x i32>%res @@ -6123,15 +6119,15 @@ define void @mov_test18(i8 * %addr, <8 x i64> %data) { ; GENERIC-LABEL: mov_test18: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps %zmm0, (%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test18: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps %zmm0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <8 x i64>* store <8 x i64>%data, <8 x i64>* %vaddr, align 64 ret void @@ -6140,15 +6136,15 @@ define void @mov_test19(i8 * %addr, <16 x i32> %data) { ; GENERIC-LABEL: mov_test19: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovups %zmm0, (%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test19: ; SKX: # %bb.0: -; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovups %zmm0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <16 x i32>* store <16 x i32>%data, <16 x i32>* %vaddr, align 1 ret void @@ -6157,15 +6153,15 @@ define void @mov_test20(i8 * %addr, <16 x i32> %data) { ; GENERIC-LABEL: mov_test20: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps %zmm0, (%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test20: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps %zmm0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <16 x i32>* store <16 x i32>%data, <16 x i32>* %vaddr, align 64 ret void @@ -6174,13 +6170,13 @@ define <8 x i64> @mov_test21(i8 * %addr) { ; GENERIC-LABEL: mov_test21: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test21: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps (%rdi), %zmm0 +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <8 x i64>* %res = load <8 x i64>, <8 x i64>* %vaddr, align 64 ret <8 x i64>%res @@ -6189,15 +6185,15 @@ define void @mov_test22(i8 * %addr, <8 x i64> %data) { ; GENERIC-LABEL: mov_test22: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovups %zmm0, (%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test22: ; SKX: # %bb.0: -; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovups %zmm0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <8 x i64>* store <8 x i64>%data, <8 x i64>* %vaddr, align 1 ret void @@ -6206,13 +6202,13 @@ define <8 x i64> @mov_test23(i8 * %addr) { ; GENERIC-LABEL: mov_test23: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovups (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test23: ; SKX: # %bb.0: -; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovups (%rdi), %zmm0 +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <8 x i64>* %res = load <8 x i64>, <8 x i64>* %vaddr, align 1 ret <8 x i64>%res @@ -6221,15 +6217,15 @@ define void @mov_test24(i8 * %addr, <8 x double> %data) { ; GENERIC-LABEL: mov_test24: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps %zmm0, (%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test24: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps %zmm0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <8 x double>* store <8 x double>%data, <8 x double>* %vaddr, align 64 ret void @@ -6238,13 +6234,13 @@ define <8 x double> @mov_test25(i8 * %addr) { ; GENERIC-LABEL: mov_test25: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test25: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps (%rdi), %zmm0 +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <8 x double>* %res = load <8 x double>, <8 x double>* %vaddr, align 64 ret <8 x double>%res @@ -6253,15 +6249,15 @@ define void @mov_test26(i8 * %addr, <16 x float> %data) { ; GENERIC-LABEL: mov_test26: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps %zmm0, (%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test26: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps %zmm0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <16 x float>* store <16 x float>%data, <16 x float>* %vaddr, align 64 ret void @@ -6270,13 +6266,13 @@ define <16 x float> @mov_test27(i8 * %addr) { ; GENERIC-LABEL: mov_test27: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test27: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps (%rdi), %zmm0 +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <16 x float>* %res = load <16 x float>, <16 x float>* %vaddr, align 64 ret <16 x float>%res @@ -6285,15 +6281,15 @@ define void @mov_test28(i8 * %addr, <8 x double> %data) { ; GENERIC-LABEL: mov_test28: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovups %zmm0, (%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test28: ; SKX: # %bb.0: -; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovups %zmm0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <8 x double>* store <8 x double>%data, <8 x double>* %vaddr, align 1 ret void @@ -6302,13 +6298,13 @@ define <8 x double> @mov_test29(i8 * %addr) { ; GENERIC-LABEL: mov_test29: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovups (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test29: ; SKX: # %bb.0: -; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovups (%rdi), %zmm0 +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <8 x double>* %res = load <8 x double>, <8 x double>* %vaddr, align 1 ret <8 x double>%res @@ -6317,15 +6313,15 @@ define void @mov_test30(i8 * %addr, <16 x float> %data) { ; GENERIC-LABEL: mov_test30: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovups %zmm0, (%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test30: ; SKX: # %bb.0: -; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovups %zmm0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <16 x float>* store <16 x float>%data, <16 x float>* %vaddr, align 1 ret void @@ -6334,13 +6330,13 @@ define <16 x float> @mov_test31(i8 * %addr) { ; GENERIC-LABEL: mov_test31: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovups (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test31: ; SKX: # %bb.0: -; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovups (%rdi), %zmm0 +; SKX-NEXT: retq %vaddr = bitcast i8* %addr to <16 x float>* %res = load <16 x float>, <16 x float>* %vaddr, align 1 ret <16 x float>%res @@ -6349,15 +6345,15 @@ define <16 x i32> @mov_test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { ; GENERIC-LABEL: mov_test32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test32: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 @@ -6368,15 +6364,15 @@ define <16 x i32> @mov_test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { ; GENERIC-LABEL: mov_test33: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test33: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 @@ -6387,15 +6383,15 @@ define <16 x i32> @mov_test34(i8 * %addr, <16 x i32> %mask1) { ; GENERIC-LABEL: mov_test34: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test34: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 @@ -6406,15 +6402,15 @@ define <16 x i32> @mov_test35(i8 * %addr, <16 x i32> %mask1) { ; GENERIC-LABEL: mov_test35: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test35: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 @@ -6425,15 +6421,15 @@ define <8 x i64> @mov_test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { ; GENERIC-LABEL: mov_test36: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test36: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 @@ -6444,15 +6440,15 @@ define <8 x i64> @mov_test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { ; GENERIC-LABEL: mov_test37: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test37: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} +; SKX-NEXT: retq %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 @@ -6463,15 +6459,15 @@ define <8 x i64> @mov_test38(i8 * %addr, <8 x i64> %mask1) { ; GENERIC-LABEL: mov_test38: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test38: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 @@ -6482,15 +6478,15 @@ define <8 x i64> @mov_test39(i8 * %addr, <8 x i64> %mask1) { ; GENERIC-LABEL: mov_test39: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test39: ; SKX: # %bb.0: -; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 @@ -6501,17 +6497,17 @@ define <16 x float> @mov_test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { ; GENERIC-LABEL: mov_test40: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [0:0.25] -; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 +; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test40: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] -; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 +; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} +; SKX-NEXT: retq %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* %r = load <16 x float>, <16 x float>* %vaddr, align 64 @@ -6522,17 +6518,17 @@ define <16 x float> @mov_test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { ; GENERIC-LABEL: mov_test41: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [0:0.25] -; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 +; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test41: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] -; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 +; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} +; SKX-NEXT: retq %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* %r = load <16 x float>, <16 x float>* %vaddr, align 1 @@ -6543,17 +6539,17 @@ define <16 x float> @mov_test42(i8 * %addr, <16 x float> %mask1) { ; GENERIC-LABEL: mov_test42: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 +; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test42: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 +; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* %r = load <16 x float>, <16 x float>* %vaddr, align 64 @@ -6564,17 +6560,17 @@ define <16 x float> @mov_test43(i8 * %addr, <16 x float> %mask1) { ; GENERIC-LABEL: mov_test43: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 +; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test43: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 +; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* %r = load <16 x float>, <16 x float>* %vaddr, align 1 @@ -6585,17 +6581,17 @@ define <8 x double> @mov_test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { ; GENERIC-LABEL: mov_test44: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [0:0.25] -; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 +; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test44: ; SKX: # %bb.0: -; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] -; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 +; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} +; SKX-NEXT: retq %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* %r = load <8 x double>, <8 x double>* %vaddr, align 64 @@ -6606,17 +6602,17 @@ define <8 x double> @mov_test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { ; GENERIC-LABEL: mov_test45: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [0:0.25] -; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 +; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test45: ; SKX: # %bb.0: -; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] -; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 +; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} +; SKX-NEXT: retq %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* %r = load <8 x double>, <8 x double>* %vaddr, align 1 @@ -6627,17 +6623,17 @@ define <8 x double> @mov_test46(i8 * %addr, <8 x double> %mask1) { ; GENERIC-LABEL: mov_test46: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 +; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test46: ; SKX: # %bb.0: -; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 +; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* %r = load <8 x double>, <8 x double>* %vaddr, align 64 @@ -6648,17 +6644,17 @@ define <8 x double> @mov_test47(i8 * %addr, <8 x double> %mask1) { ; GENERIC-LABEL: mov_test47: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 +; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: mov_test47: ; SKX: # %bb.0: -; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 +; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* %r = load <8 x double>, <8 x double>* %vaddr, align 1 @@ -6669,17 +6665,17 @@ define i16 @mask16(i16 %x) { ; GENERIC-LABEL: mask16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: notl %eax # sched: [1:0.33] +; GENERIC-NEXT: movl %edi, %eax +; GENERIC-NEXT: notl %eax ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: mask16: ; SKX: # %bb.0: -; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: notl %eax # sched: [1:0.25] +; SKX-NEXT: movl %edi, %eax +; SKX-NEXT: notl %eax ; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, %ret = bitcast <16 x i1> %m1 to i16 @@ -6689,15 +6685,15 @@ define i32 @mask16_zext(i16 %x) { ; GENERIC-LABEL: mask16_zext: ; GENERIC: # %bb.0: -; GENERIC-NEXT: notl %edi # sched: [1:0.33] -; GENERIC-NEXT: movzwl %di, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: notl %edi +; GENERIC-NEXT: movzwl %di, %eax +; GENERIC-NEXT: retq ; ; SKX-LABEL: mask16_zext: ; SKX: # %bb.0: -; SKX-NEXT: notl %edi # sched: [1:0.25] -; SKX-NEXT: movzwl %di, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: notl %edi +; SKX-NEXT: movzwl %di, %eax +; SKX-NEXT: retq %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, %m2 = bitcast <16 x i1> %m1 to i16 @@ -6708,17 +6704,17 @@ define i8 @mask8(i8 %x) { ; GENERIC-LABEL: mask8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: notb %al # sched: [1:0.33] +; GENERIC-NEXT: movl %edi, %eax +; GENERIC-NEXT: notb %al ; GENERIC-NEXT: # kill: def $al killed $al killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: mask8: ; SKX: # %bb.0: -; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: notb %al # sched: [1:0.25] +; SKX-NEXT: movl %edi, %eax +; SKX-NEXT: notb %al ; SKX-NEXT: # kill: def $al killed $al killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, %ret = bitcast <8 x i1> %m1 to i8 @@ -6728,15 +6724,15 @@ define i32 @mask8_zext(i8 %x) { ; GENERIC-LABEL: mask8_zext: ; GENERIC: # %bb.0: -; GENERIC-NEXT: notb %dil # sched: [1:0.33] -; GENERIC-NEXT: movzbl %dil, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: notb %dil +; GENERIC-NEXT: movzbl %dil, %eax +; GENERIC-NEXT: retq ; ; SKX-LABEL: mask8_zext: ; SKX: # %bb.0: -; SKX-NEXT: notb %dil # sched: [1:0.25] -; SKX-NEXT: movzbl %dil, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: notb %dil +; SKX-NEXT: movzbl %dil, %eax +; SKX-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, %m2 = bitcast <8 x i1> %m1 to i8 @@ -6747,17 +6743,17 @@ define void @mask16_mem(i16* %ptr) { ; GENERIC-LABEL: mask16_mem: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] -; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovw (%rdi), %k0 +; GENERIC-NEXT: knotw %k0, %k0 +; GENERIC-NEXT: kmovw %k0, (%rdi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: mask16_mem: ; SKX: # %bb.0: -; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] -; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovw (%rdi), %k0 +; SKX-NEXT: knotw %k0, %k0 +; SKX-NEXT: kmovw %k0, (%rdi) +; SKX-NEXT: retq %x = load i16, i16* %ptr, align 4 %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, @@ -6769,17 +6765,17 @@ define void @mask8_mem(i8* %ptr) { ; GENERIC-LABEL: mask8_mem: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] -; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovb (%rdi), %k0 +; GENERIC-NEXT: knotb %k0, %k0 +; GENERIC-NEXT: kmovb %k0, (%rdi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: mask8_mem: ; SKX: # %bb.0: -; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] -; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovb (%rdi), %k0 +; SKX-NEXT: knotb %k0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq %x = load i8, i8* %ptr, align 4 %m0 = bitcast i8 %x to <8 x i1> %m1 = xor <8 x i1> %m0, @@ -6791,23 +6787,23 @@ define i16 @mand16(i16 %x, i16 %y) { ; GENERIC-LABEL: mand16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: andl %esi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: xorl %esi, %eax # sched: [1:0.33] -; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl %edi, %eax +; GENERIC-NEXT: movl %edi, %ecx +; GENERIC-NEXT: andl %esi, %ecx +; GENERIC-NEXT: xorl %esi, %eax +; GENERIC-NEXT: orl %ecx, %eax ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: mand16: ; SKX: # %bb.0: -; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] -; SKX-NEXT: andl %esi, %ecx # sched: [1:0.25] -; SKX-NEXT: xorl %esi, %eax # sched: [1:0.25] -; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25] +; SKX-NEXT: movl %edi, %eax +; SKX-NEXT: movl %edi, %ecx +; SKX-NEXT: andl %esi, %ecx +; SKX-NEXT: xorl %esi, %eax +; SKX-NEXT: orl %ecx, %eax ; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %ma = bitcast i16 %x to <16 x i1> %mb = bitcast i16 %y to <16 x i1> %mc = and <16 x i1> %ma, %mb @@ -6820,25 +6816,25 @@ define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { ; GENERIC-LABEL: mand16_mem: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] -; GENERIC-NEXT: kmovw (%rsi), %k1 # sched: [5:0.50] -; GENERIC-NEXT: kandw %k1, %k0, %k2 # sched: [1:0.33] -; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: korw %k0, %k2, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] +; GENERIC-NEXT: kmovw (%rdi), %k0 +; GENERIC-NEXT: kmovw (%rsi), %k1 +; GENERIC-NEXT: kandw %k1, %k0, %k2 +; GENERIC-NEXT: kxorw %k1, %k0, %k0 +; GENERIC-NEXT: korw %k0, %k2, %k0 +; GENERIC-NEXT: kmovd %k0, %eax ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: mand16_mem: ; SKX: # %bb.0: -; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] -; SKX-NEXT: kmovw (%rsi), %k1 # sched: [7:1.00] -; SKX-NEXT: kandw %k1, %k0, %k2 # sched: [1:1.00] -; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: korw %k0, %k2, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] +; SKX-NEXT: kmovw (%rdi), %k0 +; SKX-NEXT: kmovw (%rsi), %k1 +; SKX-NEXT: kandw %k1, %k0, %k2 +; SKX-NEXT: kxorw %k1, %k0, %k0 +; SKX-NEXT: korw %k0, %k2, %k0 +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %ma = load <16 x i1>, <16 x i1>* %x %mb = load <16 x i1>, <16 x i1>* %y %mc = and <16 x i1> %ma, %mb @@ -6851,19 +6847,19 @@ define i8 @shuf_test1(i16 %v) nounwind { ; GENERIC-LABEL: shuf_test1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kshiftrw $8, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] +; GENERIC-NEXT: kmovd %edi, %k0 +; GENERIC-NEXT: kshiftrw $8, %k0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax ; GENERIC-NEXT: # kill: def $al killed $al killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: shuf_test1: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] -; SKX-NEXT: kshiftrw $8, %k0, %k0 # sched: [3:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kshiftrw $8, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: # kill: def $al killed $al killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %v1 = bitcast i16 %v to <16 x i1> %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> %mask1 = bitcast <8 x i1> %mask to i8 @@ -6873,21 +6869,21 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { ; GENERIC-LABEL: zext_test1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] -; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] -; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; GENERIC-NEXT: kshiftrw $5, %k0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax +; GENERIC-NEXT: andl $1, %eax +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_test1: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] -; SKX-NEXT: andl $1, %eax # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; SKX-NEXT: kshiftrw $5, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: andl $1, %eax +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %cmp_res = icmp ugt <16 x i32> %a, %b %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 %res = zext i1 %cmp_res.i1 to i32 @@ -6897,23 +6893,23 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { ; GENERIC-LABEL: zext_test2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] -; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] -; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33] +; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; GENERIC-NEXT: kshiftrw $5, %k0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax +; GENERIC-NEXT: andl $1, %eax ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_test2: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] -; SKX-NEXT: andl $1, %eax # sched: [1:0.25] +; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; SKX-NEXT: kshiftrw $5, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: andl $1, %eax ; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %cmp_res = icmp ugt <16 x i32> %a, %b %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 %res = zext i1 %cmp_res.i1 to i16 @@ -6923,23 +6919,23 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { ; GENERIC-LABEL: zext_test3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] -; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] -; GENERIC-NEXT: andb $1, %al # sched: [1:0.33] +; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; GENERIC-NEXT: kshiftrw $5, %k0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax +; GENERIC-NEXT: andb $1, %al ; GENERIC-NEXT: # kill: def $al killed $al killed $eax -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: zext_test3: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] -; SKX-NEXT: andb $1, %al # sched: [1:0.25] +; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; SKX-NEXT: kshiftrw $5, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: andb $1, %al ; SKX-NEXT: # kill: def $al killed $al killed $eax -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %cmp_res = icmp ugt <16 x i32> %a, %b %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 %res = zext i1 %cmp_res.i1 to i8 @@ -6949,17 +6945,17 @@ define i8 @conv1(<8 x i1>* %R) { ; GENERIC-LABEL: conv1: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movb $-1, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; GENERIC-NEXT: movb $-2, %al # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: movb $-1, (%rdi) +; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) +; GENERIC-NEXT: movb $-2, %al +; GENERIC-NEXT: retq ; ; SKX-LABEL: conv1: ; SKX: # %bb.0: # %entry -; SKX-NEXT: movb $-1, (%rdi) # sched: [1:1.00] -; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SKX-NEXT: movb $-2, %al # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: movb $-1, (%rdi) +; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) +; SKX-NEXT: movb $-2, %al +; SKX-NEXT: retq entry: store <8 x i1> , <8 x i1>* %R @@ -6973,19 +6969,19 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { ; GENERIC-LABEL: test4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [1:0.50] -; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [1:0.50] -; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k1 +; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} +; GENERIC-NEXT: vpmovm2d %k0, %xmm0 +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: test4: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00] -; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1 +; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} +; SKX-NEXT: vpmovm2d %k0, %xmm0 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %x_gt_y = icmp sgt <4 x i64> %x, %y %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1 @@ -6996,17 +6992,17 @@ define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { ; GENERIC-LABEL: vcmp_test5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [1:0.50] -; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [1:0.50] -; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpcmpleq %xmm3, %xmm2, %k1 +; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} +; GENERIC-NEXT: vpmovm2q %k0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vcmp_test5: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00] -; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1 +; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} +; SKX-NEXT: vpmovm2q %k0, %xmm0 +; SKX-NEXT: retq %x_gt_y = icmp slt <2 x i64> %x, %y %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1 @@ -7028,19 +7024,19 @@ define void @vcmp_test7(<8 x i1> %mask) { ; GENERIC-LABEL: vcmp_test7: ; GENERIC: # %bb.0: # %allocas -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] -; GENERIC-NEXT: orb $85, %al # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax +; GENERIC-NEXT: orb $85, %al +; GENERIC-NEXT: retq ; ; SKX-LABEL: vcmp_test7: ; SKX: # %bb.0: # %allocas -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] -; SKX-NEXT: orb $85, %al # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: orb $85, %al +; SKX-NEXT: retq allocas: %a= or <8 x i1> %mask, %b = bitcast <8 x i1> %a to i8 @@ -7056,35 +7052,35 @@ define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { ; GENERIC-LABEL: vcmp_test8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] -; GENERIC-NEXT: jg .LBB386_1 # sched: [1:1.00] +; GENERIC-NEXT: cmpl %esi, %edi +; GENERIC-NEXT: jg .LBB386_1 ; GENERIC-NEXT: # %bb.2: -; GENERIC-NEXT: kxorw %k0, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kxorw %k0, %k0, %k0 +; GENERIC-NEXT: vpmovm2b %k0, %xmm0 +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; GENERIC-NEXT: .LBB386_1: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25] -; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [1:0.50] -; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; GENERIC-NEXT: vpmovm2b %k0, %xmm0 +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: vcmp_test8: ; SKX: # %bb.0: -; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] -; SKX-NEXT: jg .LBB386_1 # sched: [1:0.50] +; SKX-NEXT: cmpl %esi, %edi +; SKX-NEXT: jg .LBB386_1 ; SKX-NEXT: # %bb.2: -; SKX-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kxorw %k0, %k0, %k0 +; SKX-NEXT: vpmovm2b %k0, %xmm0 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq ; SKX-NEXT: .LBB386_1: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; SKX-NEXT: vpmovm2b %k0, %xmm0 +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %cond = icmp sgt i32 %a1, %b1 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer %cmp2 = icmp ult <16 x i32> %b, zeroinitializer @@ -7095,31 +7091,31 @@ define <16 x i1> @vpmov_test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { ; GENERIC-LABEL: vpmov_test9: ; GENERIC: # %bb.0: -; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] -; GENERIC-NEXT: jg .LBB387_1 # sched: [1:1.00] +; GENERIC-NEXT: cmpl %esi, %edi +; GENERIC-NEXT: jg .LBB387_1 ; GENERIC-NEXT: # %bb.2: -; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: jmp .LBB387_3 # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm0 +; GENERIC-NEXT: jmp .LBB387_3 ; GENERIC-NEXT: .LBB387_1: -; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 ; GENERIC-NEXT: .LBB387_3: -; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovb2m %xmm0, %k0 +; GENERIC-NEXT: vpmovm2b %k0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vpmov_test9: ; SKX: # %bb.0: -; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] -; SKX-NEXT: jg .LBB387_1 # sched: [1:0.50] +; SKX-NEXT: cmpl %esi, %edi +; SKX-NEXT: jg .LBB387_1 ; SKX-NEXT: # %bb.2: -; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:0.50] -; SKX-NEXT: jmp .LBB387_3 # sched: [1:0.50] +; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 +; SKX-NEXT: jmp .LBB387_3 ; SKX-NEXT: .LBB387_1: -; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 ; SKX-NEXT: .LBB387_3: -; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovb2m %xmm0, %k0 +; SKX-NEXT: vpmovm2b %k0, %xmm0 +; SKX-NEXT: retq %mask = icmp sgt i32 %a1, %b1 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b ret <16 x i1>%c @@ -7132,31 +7128,31 @@ define <4 x i1> @vmov_test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { ; GENERIC-LABEL: vmov_test11: ; GENERIC: # %bb.0: -; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] -; GENERIC-NEXT: jg .LBB389_1 # sched: [1:1.00] +; GENERIC-NEXT: cmpl %esi, %edi +; GENERIC-NEXT: jg .LBB389_1 ; GENERIC-NEXT: # %bb.2: -; GENERIC-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: jmp .LBB389_3 # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm1, %xmm0 +; GENERIC-NEXT: jmp .LBB389_3 ; GENERIC-NEXT: .LBB389_1: -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 ; GENERIC-NEXT: .LBB389_3: -; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k0 +; GENERIC-NEXT: vpmovm2d %k0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vmov_test11: ; SKX: # %bb.0: -; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] -; SKX-NEXT: jg .LBB389_1 # sched: [1:0.50] +; SKX-NEXT: cmpl %esi, %edi +; SKX-NEXT: jg .LBB389_1 ; SKX-NEXT: # %bb.2: -; SKX-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:0.50] -; SKX-NEXT: jmp .LBB389_3 # sched: [1:0.50] +; SKX-NEXT: vpslld $31, %xmm1, %xmm0 +; SKX-NEXT: jmp .LBB389_3 ; SKX-NEXT: .LBB389_1: -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX-NEXT: .LBB389_3: -; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k0 +; SKX-NEXT: vpmovm2d %k0, %xmm0 +; SKX-NEXT: retq %mask = icmp sgt i32 %a1, %b1 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b ret <4 x i1>%c @@ -7165,13 +7161,13 @@ define i32 @vmov_test12(i32 %x, i32 %y) { ; GENERIC-LABEL: vmov_test12: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: movl %edi, %eax +; GENERIC-NEXT: retq ; ; SKX-LABEL: vmov_test12: ; SKX: # %bb.0: -; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: movl %edi, %eax +; SKX-NEXT: retq %a = bitcast i16 21845 to <16 x i1> %b = extractelement <16 x i1> %a, i32 0 %c = select i1 %b, i32 %x, i32 %y @@ -7181,13 +7177,13 @@ define i32 @vmov_test13(i32 %x, i32 %y) { ; GENERIC-LABEL: vmov_test13: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: movl %esi, %eax +; GENERIC-NEXT: retq ; ; SKX-LABEL: vmov_test13: ; SKX: # %bb.0: -; SKX-NEXT: movl %esi, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: movl %esi, %eax +; SKX-NEXT: retq %a = bitcast i16 21845 to <16 x i1> %b = extractelement <16 x i1> %a, i32 3 %c = select i1 %b, i32 %x, i32 %y @@ -7202,25 +7198,23 @@ define <16 x i1> @vmov_test15(i32 %x, i32 %y) { ; GENERIC-LABEL: vmov_test15: ; GENERIC: # %bb.0: -; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] +; GENERIC-NEXT: cmpl %esi, %edi ; GENERIC-NEXT: movl $21845, %eax # imm = 0x5555 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: movl $1, %ecx # sched: [1:0.33] -; GENERIC-NEXT: cmovgl %eax, %ecx # sched: [2:0.67] -; GENERIC-NEXT: kmovd %ecx, %k0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: movl $1, %ecx +; GENERIC-NEXT: cmovgl %eax, %ecx +; GENERIC-NEXT: kmovd %ecx, %k0 +; GENERIC-NEXT: vpmovm2b %k0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vmov_test15: ; SKX: # %bb.0: -; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] +; SKX-NEXT: cmpl %esi, %edi ; SKX-NEXT: movl $21845, %eax # imm = 0x5555 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: movl $1, %ecx # sched: [1:0.25] -; SKX-NEXT: cmovgl %eax, %ecx # sched: [1:0.50] -; SKX-NEXT: kmovd %ecx, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: movl $1, %ecx +; SKX-NEXT: cmovgl %eax, %ecx +; SKX-NEXT: kmovd %ecx, %k0 +; SKX-NEXT: vpmovm2b %k0, %xmm0 +; SKX-NEXT: retq %a = bitcast i16 21845 to <16 x i1> %b = bitcast i16 1 to <16 x i1> %mask = icmp sgt i32 %x, %y @@ -7232,29 +7226,29 @@ ; ; GENERIC-LABEL: vmov_test16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: movb $1, %al # sched: [1:0.33] -; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00] -; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovq %rdi, %k0 +; GENERIC-NEXT: movb $1, %al +; GENERIC-NEXT: kmovd %eax, %k1 +; GENERIC-NEXT: kshiftrq $5, %k0, %k2 +; GENERIC-NEXT: kxorq %k1, %k2, %k1 +; GENERIC-NEXT: kshiftlq $63, %k1, %k1 +; GENERIC-NEXT: kshiftrq $58, %k1, %k1 +; GENERIC-NEXT: kxorq %k1, %k0, %k0 +; GENERIC-NEXT: vpmovm2b %k0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vmov_test16: ; SKX: # %bb.0: -; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00] -; SKX-NEXT: movb $1, %al # sched: [1:0.25] -; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] -; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00] -; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] -; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00] -; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00] -; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovq %rdi, %k0 +; SKX-NEXT: movb $1, %al +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: kshiftrq $5, %k0, %k2 +; SKX-NEXT: kxorq %k1, %k2, %k1 +; SKX-NEXT: kshiftlq $63, %k1, %k1 +; SKX-NEXT: kshiftrq $58, %k1, %k1 +; SKX-NEXT: kxorq %k1, %k0, %k0 +; SKX-NEXT: vpmovm2b %k0, %zmm0 +; SKX-NEXT: retq %a = bitcast i64 %x to <64 x i1> %b = insertelement <64 x i1>%a, i1 true, i32 5 %c = sext <64 x i1>%b to <64 x i8> @@ -7265,31 +7259,31 @@ ; ; GENERIC-LABEL: vmov_test17: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: cmpl %edx, %esi # sched: [1:0.33] -; GENERIC-NEXT: setg %al # sched: [1:0.50] -; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00] -; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovq %rdi, %k0 +; GENERIC-NEXT: cmpl %edx, %esi +; GENERIC-NEXT: setg %al +; GENERIC-NEXT: kmovd %eax, %k1 +; GENERIC-NEXT: kshiftrq $5, %k0, %k2 +; GENERIC-NEXT: kxorq %k1, %k2, %k1 +; GENERIC-NEXT: kshiftlq $63, %k1, %k1 +; GENERIC-NEXT: kshiftrq $58, %k1, %k1 +; GENERIC-NEXT: kxorq %k1, %k0, %k0 +; GENERIC-NEXT: vpmovm2b %k0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vmov_test17: ; SKX: # %bb.0: -; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00] -; SKX-NEXT: cmpl %edx, %esi # sched: [1:0.25] -; SKX-NEXT: setg %al # sched: [1:0.50] -; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] -; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00] -; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] -; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00] -; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00] -; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovq %rdi, %k0 +; SKX-NEXT: cmpl %edx, %esi +; SKX-NEXT: setg %al +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: kshiftrq $5, %k0, %k2 +; SKX-NEXT: kxorq %k1, %k2, %k1 +; SKX-NEXT: kshiftlq $63, %k1, %k1 +; SKX-NEXT: kshiftrq $58, %k1, %k1 +; SKX-NEXT: kxorq %k1, %k0, %k0 +; SKX-NEXT: vpmovm2b %k0, %zmm0 +; SKX-NEXT: retq %a = bitcast i64 %x to <64 x i1> %b = icmp sgt i32 %y, %z %c = insertelement <64 x i1>%a, i1 %b, i32 5 @@ -7300,39 +7294,39 @@ define <8 x i1> @vmov_test18(i8 %a, i16 %y) { ; GENERIC-LABEL: vmov_test18: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kshiftrw $8, %k1, %k2 # sched: [1:1.00] -; GENERIC-NEXT: kshiftrw $9, %k1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kshiftrb $6, %k0, %k3 # sched: [1:1.00] -; GENERIC-NEXT: kxorb %k1, %k3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kshiftlb $7, %k1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kshiftrb $1, %k1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kshiftlb $1, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: kshiftrb $1, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: kshiftlb $7, %k2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: korb %k1, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k0 +; GENERIC-NEXT: kmovd %esi, %k1 +; GENERIC-NEXT: kshiftrw $8, %k1, %k2 +; GENERIC-NEXT: kshiftrw $9, %k1, %k1 +; GENERIC-NEXT: kshiftrb $6, %k0, %k3 +; GENERIC-NEXT: kxorb %k1, %k3, %k1 +; GENERIC-NEXT: kshiftlb $7, %k1, %k1 +; GENERIC-NEXT: kshiftrb $1, %k1, %k1 +; GENERIC-NEXT: kxorb %k1, %k0, %k0 +; GENERIC-NEXT: kshiftlb $1, %k0, %k0 +; GENERIC-NEXT: kshiftrb $1, %k0, %k0 +; GENERIC-NEXT: kshiftlb $7, %k2, %k1 +; GENERIC-NEXT: korb %k1, %k0, %k0 +; GENERIC-NEXT: vpmovm2w %k0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: vmov_test18: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] -; SKX-NEXT: kshiftrw $8, %k1, %k2 # sched: [3:1.00] -; SKX-NEXT: kshiftrw $9, %k1, %k1 # sched: [3:1.00] -; SKX-NEXT: kshiftrb $6, %k0, %k3 # sched: [3:1.00] -; SKX-NEXT: kxorb %k1, %k3, %k1 # sched: [1:1.00] -; SKX-NEXT: kshiftlb $7, %k1, %k1 # sched: [3:1.00] -; SKX-NEXT: kshiftrb $1, %k1, %k1 # sched: [3:1.00] -; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kshiftlb $1, %k0, %k0 # sched: [3:1.00] -; SKX-NEXT: kshiftrb $1, %k0, %k0 # sched: [3:1.00] -; SKX-NEXT: kshiftlb $7, %k2, %k1 # sched: [3:1.00] -; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 +; SKX-NEXT: kshiftrw $8, %k1, %k2 +; SKX-NEXT: kshiftrw $9, %k1, %k1 +; SKX-NEXT: kshiftrb $6, %k0, %k3 +; SKX-NEXT: kxorb %k1, %k3, %k1 +; SKX-NEXT: kshiftlb $7, %k1, %k1 +; SKX-NEXT: kshiftrb $1, %k1, %k1 +; SKX-NEXT: kxorb %k1, %k0, %k0 +; SKX-NEXT: kshiftlb $1, %k0, %k0 +; SKX-NEXT: kshiftrb $1, %k0, %k0 +; SKX-NEXT: kshiftlb $7, %k2, %k1 +; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: vpmovm2w %k0, %xmm0 +; SKX-NEXT: retq %b = bitcast i8 %a to <8 x i1> %b1 = bitcast i16 %y to <16 x i1> %el1 = extractelement <16 x i1>%b1, i32 8 @@ -7344,17 +7338,17 @@ define <32 x i16> @vmov_test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { ; GENERIC-LABEL: vmov_test21: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 +; GENERIC-NEXT: vpmovb2m %ymm1, %k1 +; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: vmov_test21: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] -; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 +; SKX-NEXT: vpmovb2m %ymm1, %k1 +; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer ret <32 x i16> %ret } @@ -7362,17 +7356,17 @@ define void @vmov_test22(<4 x i1> %a, <4 x i1>* %addr) { ; GENERIC-LABEL: vmov_test22: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovd2m %xmm0, %k0 +; GENERIC-NEXT: kmovb %k0, (%rdi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: vmov_test22: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq store <4 x i1> %a, <4 x i1>* %addr ret void } @@ -7380,17 +7374,17 @@ define void @vmov_test23(<2 x i1> %a, <2 x i1>* %addr) { ; GENERIC-LABEL: vmov_test23: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovq2m %xmm0, %k0 +; GENERIC-NEXT: kmovb %k0, (%rdi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: vmov_test23: ; SKX: # %bb.0: -; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 +; SKX-NEXT: vpmovq2m %xmm0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq store <2 x i1> %a, <2 x i1>* %addr ret void } @@ -7398,19 +7392,19 @@ define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { ; GENERIC-LABEL: store_v1i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovb %k0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %edi, %k0 +; GENERIC-NEXT: kxnorw %k0, %k0, %k1 +; GENERIC-NEXT: kxorw %k1, %k0, %k0 +; GENERIC-NEXT: kmovb %k0, (%rsi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: store_v1i1: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] -; SKX-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:1.00] -; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovb %k0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kxnorw %k0, %k0, %k1 +; SKX-NEXT: kxorw %k1, %k0, %k0 +; SKX-NEXT: kmovb %k0, (%rsi) +; SKX-NEXT: retq %x = xor <1 x i1> %c, store <1 x i1> %x, <1 x i1>* %ptr, align 4 ret void @@ -7419,19 +7413,19 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; GENERIC-LABEL: store_v2i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovq2m %xmm0, %k0 +; GENERIC-NEXT: knotw %k0, %k0 +; GENERIC-NEXT: kmovb %k0, (%rdi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: store_v2i1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 +; SKX-NEXT: vpmovq2m %xmm0, %k0 +; SKX-NEXT: knotw %k0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq %x = xor <2 x i1> %c, store <2 x i1> %x, <2 x i1>* %ptr, align 4 ret void @@ -7440,19 +7434,19 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; GENERIC-LABEL: store_v4i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovd2m %xmm0, %k0 +; GENERIC-NEXT: knotw %k0, %k0 +; GENERIC-NEXT: kmovb %k0, (%rdi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: store_v4i1: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vpmovd2m %xmm0, %k0 +; SKX-NEXT: knotw %k0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq %x = xor <4 x i1> %c, store <4 x i1> %x, <4 x i1>* %ptr, align 4 ret void @@ -7461,19 +7455,19 @@ define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { ; GENERIC-LABEL: store_v8i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k0 +; GENERIC-NEXT: knotb %k0, %k0 +; GENERIC-NEXT: kmovb %k0, (%rdi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: store_v8i1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k0 +; SKX-NEXT: knotb %k0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq %x = xor <8 x i1> %c, store <8 x i1> %x, <8 x i1>* %ptr, align 4 ret void @@ -7482,19 +7476,19 @@ define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { ; GENERIC-LABEL: store_v16i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovb2m %xmm0, %k0 +; GENERIC-NEXT: knotw %k0, %k0 +; GENERIC-NEXT: kmovw %k0, (%rdi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: store_v16i1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 +; SKX-NEXT: vpmovb2m %xmm0, %k0 +; SKX-NEXT: knotw %k0, %k0 +; SKX-NEXT: kmovw %k0, (%rdi) +; SKX-NEXT: retq %x = xor <16 x i1> %c, store <16 x i1> %x, <16 x i1>* %ptr, align 4 ret void @@ -7516,16 +7510,16 @@ define void @f1(i32 %c) { ; GENERIC-LABEL: f1: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50] -; GENERIC-NEXT: xorl $1, %edi # sched: [1:0.33] -; GENERIC-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00] +; GENERIC-NEXT: movzbl {{.*}}(%rip), %edi +; GENERIC-NEXT: xorl $1, %edi +; GENERIC-NEXT: movb %dil, {{.*}}(%rip) ; GENERIC-NEXT: jmp f2 # TAILCALL ; ; SKX-LABEL: f1: ; SKX: # %bb.0: # %entry -; SKX-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50] -; SKX-NEXT: xorl $1, %edi # sched: [1:0.25] -; SKX-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00] +; SKX-NEXT: movzbl {{.*}}(%rip), %edi +; SKX-NEXT: xorl $1, %edi +; SKX-NEXT: movb %dil, {{.*}}(%rip) ; SKX-NEXT: jmp f2 # TAILCALL entry: %.b1 = load i1, i1* @f1.v, align 4 @@ -7541,15 +7535,15 @@ define void @store_i16_i1(i16 %x, i1 *%y) { ; GENERIC-LABEL: store_i16_i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] -; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: andl $1, %edi +; GENERIC-NEXT: movb %dil, (%rsi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: store_i16_i1: ; SKX: # %bb.0: -; SKX-NEXT: andl $1, %edi # sched: [1:0.25] -; SKX-NEXT: movb %dil, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: andl $1, %edi +; SKX-NEXT: movb %dil, (%rsi) +; SKX-NEXT: retq %c = trunc i16 %x to i1 store i1 %c, i1* %y ret void @@ -7558,15 +7552,15 @@ define void @store_i8_i1(i8 %x, i1 *%y) { ; GENERIC-LABEL: store_i8_i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] -; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: andl $1, %edi +; GENERIC-NEXT: movb %dil, (%rsi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: store_i8_i1: ; SKX: # %bb.0: -; SKX-NEXT: andl $1, %edi # sched: [1:0.25] -; SKX-NEXT: movb %dil, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: andl $1, %edi +; SKX-NEXT: movb %dil, (%rsi) +; SKX-NEXT: retq %c = trunc i8 %x to i1 store i1 %c, i1* %y ret void @@ -7576,18 +7570,16 @@ ; GENERIC-LABEL: test_build_vec_v32i1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: movl $1497715861, %eax # imm = 0x59455495 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd %eax, %k1 +; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_build_vec_v32i1: ; SKX: # %bb.0: ; SKX-NEXT: movl $1497715861, %eax # imm = 0x59455495 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] -; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd %eax, %k1 +; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq %ret = select <32 x i1> , <32 x i16> %x, <32 x i16> zeroinitializer ret <32 x i16> %ret } @@ -7595,13 +7587,13 @@ define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { ; GENERIC-LABEL: test_build_vec_v64i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_build_vec_v64i1: ; SKX: # %bb.0: -; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero +; SKX-NEXT: retq %ret = select <64 x i1> , <64 x i8> %x, <64 x i8> zeroinitializer ret <64 x i8> %ret } @@ -7609,37 +7601,37 @@ define void @ktest_1(<8 x double> %in, double * %base) { ; GENERIC-LABEL: ktest_1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovupd (%rdi), %zmm1 # sched: [7:0.50] -; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] -; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00] +; GENERIC-NEXT: vmovupd (%rdi), %zmm1 +; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} +; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} +; GENERIC-NEXT: kortestb %k0, %k0 +; GENERIC-NEXT: je .LBB410_2 ; GENERIC-NEXT: # %bb.1: # %L1 -; GENERIC-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd %zmm0, (%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; GENERIC-NEXT: .LBB410_2: # %L2 -; GENERIC-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd %zmm0, 8(%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: ktest_1: ; SKX: # %bb.0: -; SKX-NEXT: vmovupd (%rdi), %zmm1 # sched: [8:0.50] -; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] -; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00] -; SKX-NEXT: je .LBB410_2 # sched: [1:0.50] +; SKX-NEXT: vmovupd (%rdi), %zmm1 +; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} +; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} +; SKX-NEXT: kortestb %k0, %k0 +; SKX-NEXT: je .LBB410_2 ; SKX-NEXT: # %bb.1: # %L1 -; SKX-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd %zmm0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq ; SKX-NEXT: .LBB410_2: # %L2 -; SKX-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd %zmm0, 8(%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %addr1 = getelementptr double, double * %base, i64 0 %addr2 = getelementptr double, double * %base, i64 1 @@ -7671,53 +7663,53 @@ ; ; GENERIC-LABEL: ktest_2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovups (%rdi), %zmm2 # sched: [7:0.50] -; GENERIC-NEXT: vmovups 64(%rdi), %zmm3 # sched: [7:0.50] -; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00] -; GENERIC-NEXT: kunpckwd %k1, %k2, %k0 # sched: [1:1.00] -; GENERIC-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [7:0.50] -; GENERIC-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [7:0.50] -; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] -; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00] -; GENERIC-NEXT: kortestd %k1, %k0 # sched: [1:0.33] -; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00] +; GENERIC-NEXT: vmovups (%rdi), %zmm2 +; GENERIC-NEXT: vmovups 64(%rdi), %zmm3 +; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 +; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 +; GENERIC-NEXT: kunpckwd %k1, %k2, %k0 +; GENERIC-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} +; GENERIC-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} +; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 +; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 +; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 +; GENERIC-NEXT: kortestd %k1, %k0 +; GENERIC-NEXT: je .LBB411_2 ; GENERIC-NEXT: # %bb.1: # %L1 -; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps %zmm0, (%rdi) +; GENERIC-NEXT: vmovaps %zmm1, 64(%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; GENERIC-NEXT: .LBB411_2: # %L2 -; GENERIC-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps %zmm0, 4(%rdi) +; GENERIC-NEXT: vmovaps %zmm1, 68(%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: ktest_2: ; SKX: # %bb.0: -; SKX-NEXT: vmovups (%rdi), %zmm2 # sched: [8:0.50] -; SKX-NEXT: vmovups 64(%rdi), %zmm3 # sched: [8:0.50] -; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00] -; SKX-NEXT: kunpckwd %k1, %k2, %k0 # sched: [3:1.00] -; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [8:0.50] -; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] -; SKX-NEXT: kunpckwd %k1, %k2, %k1 # sched: [3:1.00] -; SKX-NEXT: kortestd %k1, %k0 # sched: [3:1.00] -; SKX-NEXT: je .LBB411_2 # sched: [1:0.50] +; SKX-NEXT: vmovups (%rdi), %zmm2 +; SKX-NEXT: vmovups 64(%rdi), %zmm3 +; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1 +; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2 +; SKX-NEXT: kunpckwd %k1, %k2, %k0 +; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} +; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} +; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 +; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 +; SKX-NEXT: kunpckwd %k1, %k2, %k1 +; SKX-NEXT: kortestd %k1, %k0 +; SKX-NEXT: je .LBB411_2 ; SKX-NEXT: # %bb.1: # %L1 -; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps %zmm0, (%rdi) +; SKX-NEXT: vmovaps %zmm1, 64(%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq ; SKX-NEXT: .LBB411_2: # %L2 -; SKX-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00] -; SKX-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps %zmm0, 4(%rdi) +; SKX-NEXT: vmovaps %zmm1, 68(%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %addr1 = getelementptr float, float * %base, i64 0 %addr2 = getelementptr float, float * %base, i64 1 @@ -7748,15 +7740,15 @@ define <8 x i64> @load_8i1(<8 x i1>* %a) { ; GENERIC-LABEL: load_8i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] -; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovb (%rdi), %k0 +; GENERIC-NEXT: vpmovm2q %k0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: load_8i1: ; SKX: # %bb.0: -; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] -; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovb (%rdi), %k0 +; SKX-NEXT: vpmovm2q %k0, %zmm0 +; SKX-NEXT: retq %b = load <8 x i1>, <8 x i1>* %a %c = sext <8 x i1> %b to <8 x i64> ret <8 x i64> %c @@ -7765,15 +7757,15 @@ define <16 x i32> @load_16i1(<16 x i1>* %a) { ; GENERIC-LABEL: load_16i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] -; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovw (%rdi), %k0 +; GENERIC-NEXT: vpmovm2d %k0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: load_16i1: ; SKX: # %bb.0: -; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] -; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovw (%rdi), %k0 +; SKX-NEXT: vpmovm2d %k0, %zmm0 +; SKX-NEXT: retq %b = load <16 x i1>, <16 x i1>* %a %c = sext <16 x i1> %b to <16 x i32> ret <16 x i32> %c @@ -7782,15 +7774,15 @@ define <2 x i16> @load_2i1(<2 x i1>* %a) { ; GENERIC-LABEL: load_2i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] -; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovb (%rdi), %k0 +; GENERIC-NEXT: vpmovm2q %k0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: load_2i1: ; SKX: # %bb.0: -; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] -; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovb (%rdi), %k0 +; SKX-NEXT: vpmovm2q %k0, %xmm0 +; SKX-NEXT: retq %b = load <2 x i1>, <2 x i1>* %a %c = sext <2 x i1> %b to <2 x i16> ret <2 x i16> %c @@ -7799,15 +7791,15 @@ define <4 x i16> @load_4i1(<4 x i1>* %a) { ; GENERIC-LABEL: load_4i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] -; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovb (%rdi), %k0 +; GENERIC-NEXT: vpmovm2d %k0, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: load_4i1: ; SKX: # %bb.0: -; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] -; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovb (%rdi), %k0 +; SKX-NEXT: vpmovm2d %k0, %xmm0 +; SKX-NEXT: retq %b = load <4 x i1>, <4 x i1>* %a %c = sext <4 x i1> %b to <4 x i16> ret <4 x i16> %c @@ -7816,15 +7808,15 @@ define <32 x i16> @load_32i1(<32 x i1>* %a) { ; GENERIC-LABEL: load_32i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd (%rdi), %k0 # sched: [5:0.50] -; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovd (%rdi), %k0 +; GENERIC-NEXT: vpmovm2w %k0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: load_32i1: ; SKX: # %bb.0: -; SKX-NEXT: kmovd (%rdi), %k0 # sched: [7:1.00] -; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovd (%rdi), %k0 +; SKX-NEXT: vpmovm2w %k0, %zmm0 +; SKX-NEXT: retq %b = load <32 x i1>, <32 x i1>* %a %c = sext <32 x i1> %b to <32 x i16> ret <32 x i16> %c @@ -7833,15 +7825,15 @@ define <64 x i8> @load_64i1(<64 x i1>* %a) { ; GENERIC-LABEL: load_64i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovq (%rdi), %k0 # sched: [5:0.50] -; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: kmovq (%rdi), %k0 +; GENERIC-NEXT: vpmovm2b %k0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: load_64i1: ; SKX: # %bb.0: -; SKX-NEXT: kmovq (%rdi), %k0 # sched: [7:1.00] -; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: kmovq (%rdi), %k0 +; SKX-NEXT: vpmovm2b %k0, %zmm0 +; SKX-NEXT: retq %b = load <64 x i1>, <64 x i1>* %a %c = sext <64 x i1> %b to <64 x i8> ret <64 x i8> %c @@ -7850,17 +7842,17 @@ define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { ; GENERIC-LABEL: store_8i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k0 +; GENERIC-NEXT: kmovb %k0, (%rdi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: store_8i1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq store <8 x i1> %v, <8 x i1>* %a ret void } @@ -7868,17 +7860,17 @@ define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { ; GENERIC-LABEL: store_8i1_1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovw2m %xmm0, %k0 +; GENERIC-NEXT: kmovb %k0, (%rdi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: store_8i1_1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: vpmovw2m %xmm0, %k0 +; SKX-NEXT: kmovb %k0, (%rdi) +; SKX-NEXT: retq %v1 = trunc <8 x i16> %v to <8 x i1> store <8 x i1> %v1, <8 x i1>* %a ret void @@ -7887,17 +7879,17 @@ define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { ; GENERIC-LABEL: store_16i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 +; GENERIC-NEXT: vpmovb2m %xmm0, %k0 +; GENERIC-NEXT: kmovw %k0, (%rdi) +; GENERIC-NEXT: retq ; ; SKX-LABEL: store_16i1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 +; SKX-NEXT: vpmovb2m %xmm0, %k0 +; SKX-NEXT: kmovw %k0, (%rdi) +; SKX-NEXT: retq store <16 x i1> %v, <16 x i1>* %a ret void } @@ -7905,19 +7897,19 @@ define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { ; GENERIC-LABEL: store_32i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 +; GENERIC-NEXT: vpmovb2m %ymm0, %k0 +; GENERIC-NEXT: kmovd %k0, (%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: store_32i1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 +; SKX-NEXT: vpmovb2m %ymm0, %k0 +; SKX-NEXT: kmovd %k0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq store <32 x i1> %v, <32 x i1>* %a ret void } @@ -7925,19 +7917,19 @@ define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { ; GENERIC-LABEL: store_32i1_1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $15, %zmm0, %zmm0 +; GENERIC-NEXT: vpmovw2m %zmm0, %k0 +; GENERIC-NEXT: kmovd %k0, (%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: store_32i1_1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 +; SKX-NEXT: vpmovw2m %zmm0, %k0 +; SKX-NEXT: kmovd %k0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %v1 = trunc <32 x i16> %v to <32 x i1> store <32 x i1> %v1, <32 x i1>* %a ret void @@ -7948,19 +7940,19 @@ ; ; GENERIC-LABEL: store_64i1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpsllw $7, %zmm0, %zmm0 +; GENERIC-NEXT: vpmovb2m %zmm0, %k0 +; GENERIC-NEXT: kmovq %k0, (%rdi) +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: store_64i1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 +; SKX-NEXT: vpmovb2m %zmm0, %k0 +; SKX-NEXT: kmovq %k0, (%rdi) +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq store <64 x i1> %v, <64 x i1>* %a ret void } @@ -7968,19 +7960,19 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) { ; GENERIC-LABEL: test_bitcast_v8i1_zext: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33] -; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; GENERIC-NEXT: kmovb %k0, %eax +; GENERIC-NEXT: addl %eax, %eax +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_bitcast_v8i1_zext: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: kmovb %k0, %eax # sched: [3:1.00] -; SKX-NEXT: addl %eax, %eax # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; SKX-NEXT: kmovb %k0, %eax +; SKX-NEXT: addl %eax, %eax +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %v1 = icmp eq <16 x i32> %a, zeroinitializer %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> %mask1 = bitcast <8 x i1> %mask to i8 @@ -7992,19 +7984,19 @@ define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) { ; GENERIC-LABEL: test_bitcast_v16i1_zext: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33] -; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; GENERIC-NEXT: kmovw %k0, %eax +; GENERIC-NEXT: addl %eax, %eax +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_bitcast_v16i1_zext: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: kmovw %k0, %eax # sched: [3:1.00] -; SKX-NEXT: addl %eax, %eax # sched: [1:0.25] -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: addl %eax, %eax +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq %v1 = icmp eq <16 x i32> %a, zeroinitializer %mask1 = bitcast <16 x i1> %v1 to i16 %val = zext i16 %mask1 to i32 @@ -8015,21 +8007,21 @@ define i16 @test_v16i1_add(i16 %x, i16 %y) { ; GENERIC-LABEL: test_v16i1_add: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] +; GENERIC-NEXT: kmovd %edi, %k0 +; GENERIC-NEXT: kmovd %esi, %k1 +; GENERIC-NEXT: kxorw %k1, %k0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_v16i1_add: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] -; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 +; SKX-NEXT: kxorw %k1, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %m0 = bitcast i16 %x to <16 x i1> %m1 = bitcast i16 %y to <16 x i1> %m2 = add <16 x i1> %m0, %m1 @@ -8040,21 +8032,21 @@ define i16 @test_v16i1_sub(i16 %x, i16 %y) { ; GENERIC-LABEL: test_v16i1_sub: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] +; GENERIC-NEXT: kmovd %edi, %k0 +; GENERIC-NEXT: kmovd %esi, %k1 +; GENERIC-NEXT: kxorw %k1, %k0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_v16i1_sub: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] -; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 +; SKX-NEXT: kxorw %k1, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %m0 = bitcast i16 %x to <16 x i1> %m1 = bitcast i16 %y to <16 x i1> %m2 = sub <16 x i1> %m0, %m1 @@ -8065,21 +8057,21 @@ define i16 @test_v16i1_mul(i16 %x, i16 %y) { ; GENERIC-LABEL: test_v16i1_mul: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kandw %k1, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] +; GENERIC-NEXT: kmovd %edi, %k0 +; GENERIC-NEXT: kmovd %esi, %k1 +; GENERIC-NEXT: kandw %k1, %k0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_v16i1_mul: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] -; SKX-NEXT: kandw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 +; SKX-NEXT: kandw %k1, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %m0 = bitcast i16 %x to <16 x i1> %m1 = bitcast i16 %y to <16 x i1> %m2 = mul <16 x i1> %m0, %m1 @@ -8090,21 +8082,21 @@ define i8 @test_v8i1_add(i8 %x, i8 %y) { ; GENERIC-LABEL: test_v8i1_add: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] +; GENERIC-NEXT: kmovd %edi, %k0 +; GENERIC-NEXT: kmovd %esi, %k1 +; GENERIC-NEXT: kxorb %k1, %k0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax ; GENERIC-NEXT: # kill: def $al killed $al killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_v8i1_add: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] -; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 +; SKX-NEXT: kxorb %k1, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: # kill: def $al killed $al killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = bitcast i8 %y to <8 x i1> %m2 = add <8 x i1> %m0, %m1 @@ -8115,21 +8107,21 @@ define i8 @test_v8i1_sub(i8 %x, i8 %y) { ; GENERIC-LABEL: test_v8i1_sub: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] +; GENERIC-NEXT: kmovd %edi, %k0 +; GENERIC-NEXT: kmovd %esi, %k1 +; GENERIC-NEXT: kxorb %k1, %k0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax ; GENERIC-NEXT: # kill: def $al killed $al killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_v8i1_sub: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] -; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 +; SKX-NEXT: kxorb %k1, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: # kill: def $al killed $al killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = bitcast i8 %y to <8 x i1> %m2 = sub <8 x i1> %m0, %m1 @@ -8140,21 +8132,21 @@ define i8 @test_v8i1_mul(i8 %x, i8 %y) { ; GENERIC-LABEL: test_v8i1_mul: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] -; GENERIC-NEXT: kandb %k1, %k0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] +; GENERIC-NEXT: kmovd %edi, %k0 +; GENERIC-NEXT: kmovd %esi, %k1 +; GENERIC-NEXT: kandb %k1, %k0, %k0 +; GENERIC-NEXT: kmovd %k0, %eax ; GENERIC-NEXT: # kill: def $al killed $al killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_v8i1_mul: ; SKX: # %bb.0: -; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] -; SKX-NEXT: kandb %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] +; SKX-NEXT: kmovd %edi, %k0 +; SKX-NEXT: kmovd %esi, %k1 +; SKX-NEXT: kandb %k1, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: # kill: def $al killed $al killed $eax -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %m0 = bitcast i8 %x to <8 x i1> %m1 = bitcast i8 %y to <8 x i1> %m2 = mul <8 x i1> %m0, %m1 @@ -8165,13 +8157,13 @@ define <16 x i32> @_inreg16xi32(i32 %a) { ; GENERIC-LABEL: _inreg16xi32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _inreg16xi32: ; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpbroadcastd %edi, %zmm0 +; SKX-NEXT: retq %b = insertelement <16 x i32> undef, i32 %a, i32 0 %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer ret <16 x i32> %c @@ -8180,13 +8172,13 @@ define <8 x i64> @_inreg8xi64(i64 %a) { ; GENERIC-LABEL: _inreg8xi64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _inreg8xi64: ; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpbroadcastq %rdi, %zmm0 +; SKX-NEXT: retq %b = insertelement <8 x i64> undef, i64 %a, i32 0 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer ret <8 x i64> %c @@ -8195,13 +8187,13 @@ define <16 x float> @_ss16xfloat_v4(<4 x float> %a) { ; GENERIC-LABEL: _ss16xfloat_v4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _ss16xfloat_v4: ; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastss %xmm0, %zmm0 +; SKX-NEXT: retq %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer ret <16 x float> %b } @@ -8209,13 +8201,13 @@ define <16 x float> @_inreg16xfloat(float %a) { ; GENERIC-LABEL: _inreg16xfloat: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _inreg16xfloat: ; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastss %xmm0, %zmm0 +; SKX-NEXT: retq %b = insertelement <16 x float> undef, float %a, i32 0 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer ret <16 x float> %c @@ -8224,17 +8216,17 @@ define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) { ; GENERIC-LABEL: _ss16xfloat_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _ss16xfloat_mask: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %b = insertelement <16 x float> undef, float %a, i32 0 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer @@ -8245,15 +8237,15 @@ define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) { ; GENERIC-LABEL: _ss16xfloat_maskz: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: _ss16xfloat_maskz: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = icmp ne <16 x i32> %mask1, zeroinitializer %b = insertelement <16 x float> undef, float %a, i32 0 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer @@ -8264,13 +8256,13 @@ define <16 x float> @_ss16xfloat_load(float* %a.ptr) { ; GENERIC-LABEL: _ss16xfloat_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _ss16xfloat_load: ; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastss (%rdi), %zmm0 +; SKX-NEXT: retq %a = load float, float* %a.ptr %b = insertelement <16 x float> undef, float %a, i32 0 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer @@ -8280,15 +8272,15 @@ define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) { ; GENERIC-LABEL: _ss16xfloat_mask_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: _ss16xfloat_mask_load: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} +; SKX-NEXT: retq %a = load float, float* %a.ptr %mask = icmp ne <16 x i32> %mask1, zeroinitializer %b = insertelement <16 x float> undef, float %a, i32 0 @@ -8300,15 +8292,15 @@ define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) { ; GENERIC-LABEL: _ss16xfloat_maskz_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: _ss16xfloat_maskz_load: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %a = load float, float* %a.ptr %mask = icmp ne <16 x i32> %mask1, zeroinitializer %b = insertelement <16 x float> undef, float %a, i32 0 @@ -8320,13 +8312,13 @@ define <8 x double> @_inreg8xdouble(double %a) { ; GENERIC-LABEL: _inreg8xdouble: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _inreg8xdouble: ; SKX: # %bb.0: -; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 +; SKX-NEXT: retq %b = insertelement <8 x double> undef, double %a, i32 0 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer ret <8 x double> %c @@ -8335,17 +8327,17 @@ define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) { ; GENERIC-LABEL: _sd8xdouble_mask: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _sd8xdouble_mask: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %mask = icmp ne <8 x i32> %mask1, zeroinitializer %b = insertelement <8 x double> undef, double %a, i32 0 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer @@ -8356,15 +8348,15 @@ define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) { ; GENERIC-LABEL: _sd8xdouble_maskz: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: _sd8xdouble_maskz: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq %mask = icmp ne <8 x i32> %mask1, zeroinitializer %b = insertelement <8 x double> undef, double %a, i32 0 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer @@ -8375,13 +8367,13 @@ define <8 x double> @_sd8xdouble_load(double* %a.ptr) { ; GENERIC-LABEL: _sd8xdouble_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _sd8xdouble_load: ; SKX: # %bb.0: -; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 +; SKX-NEXT: retq %a = load double, double* %a.ptr %b = insertelement <8 x double> undef, double %a, i32 0 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer @@ -8391,15 +8383,15 @@ define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) { ; GENERIC-LABEL: _sd8xdouble_mask_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: _sd8xdouble_mask_load: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} +; SKX-NEXT: retq %a = load double, double* %a.ptr %mask = icmp ne <8 x i32> %mask1, zeroinitializer %b = insertelement <8 x double> undef, double %a, i32 0 @@ -8411,15 +8403,15 @@ define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) { ; GENERIC-LABEL: _sd8xdouble_maskz_load: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: _sd8xdouble_maskz_load: ; SKX: # %bb.0: -; SKX-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestmd %ymm0, %ymm0, %k1 +; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} +; SKX-NEXT: retq %a = load double, double* %a.ptr %mask = icmp ne <8 x i32> %mask1, zeroinitializer %b = insertelement <8 x double> undef, double %a, i32 0 @@ -8431,13 +8423,13 @@ define <16 x i32> @_xmm16xi32(<16 x i32> %a) { ; GENERIC-LABEL: _xmm16xi32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _xmm16xi32: ; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastss %xmm0, %zmm0 +; SKX-NEXT: retq %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer ret <16 x i32> %b } @@ -8445,13 +8437,13 @@ define <16 x float> @_xmm16xfloat(<16 x float> %a) { ; GENERIC-LABEL: _xmm16xfloat: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _xmm16xfloat: ; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastss %xmm0, %zmm0 +; SKX-NEXT: retq %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer ret <16 x float> %b } @@ -8459,21 +8451,21 @@ define <16 x i32> @test_vbroadcast() { ; GENERIC-LABEL: test_vbroadcast: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.25] -; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00] -; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] -; GENERIC-NEXT: knotw %k0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 +; GENERIC-NEXT: vpmovm2d %k0, %zmm0 +; GENERIC-NEXT: knotw %k0, %k1 +; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_vbroadcast: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: knotw %k0, %k1 # sched: [1:1.00] -; SKX-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; SKX-NEXT: vcmpunordps %zmm0, %zmm0, %k0 +; SKX-NEXT: vpmovm2d %k0, %zmm0 +; SKX-NEXT: knotw %k0, %k1 +; SKX-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq entry: %0 = sext <16 x i1> zeroinitializer to <16 x i32> %1 = fcmp uno <16 x float> undef, zeroinitializer @@ -8487,13 +8479,13 @@ define <8 x double> @test_set1_pd(double %d) #2 { ; GENERIC-LABEL: test_set1_pd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_set1_pd: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 +; SKX-NEXT: retq entry: %vecinit.i = insertelement <8 x double> undef, double %d, i32 0 %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1 @@ -8509,13 +8501,13 @@ define <8 x i64> @test_set1_epi64(i64 %d) #2 { ; GENERIC-LABEL: test_set1_epi64: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_set1_epi64: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpbroadcastq %rdi, %zmm0 +; SKX-NEXT: retq entry: %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0 %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1 @@ -8531,13 +8523,13 @@ define <16 x float> @test_set1_ps(float %f) #2 { ; GENERIC-LABEL: test_set1_ps: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_set1_ps: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastss %xmm0, %zmm0 +; SKX-NEXT: retq entry: %vecinit.i = insertelement <16 x float> undef, float %f, i32 0 %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1 @@ -8561,13 +8553,13 @@ define <16 x i32> @test_set1_epi32(i32 %f) #2 { ; GENERIC-LABEL: test_set1_epi32: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_set1_epi32: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpbroadcastd %edi, %zmm0 +; SKX-NEXT: retq entry: %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0 %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1 @@ -8593,13 +8585,13 @@ define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) { ; GENERIC-LABEL: test_mm512_broadcastsd_pd: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_mm512_broadcastsd_pd: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 +; SKX-NEXT: retq entry: %0 = extractelement <2 x double> %a, i32 0 %vecinit.i = insertelement <8 x double> undef, double %0, i32 0 @@ -8616,13 +8608,13 @@ define <16 x float> @suff_test1(<8 x float>%a) { ; GENERIC-LABEL: suff_test1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: suff_test1: ; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastss %xmm0, %zmm0 +; SKX-NEXT: retq %res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer ret <16 x float>%res } @@ -8630,13 +8622,13 @@ define <8 x double> @suff_test2(<4 x double>%a) { ; GENERIC-LABEL: suff_test2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: suff_test2: ; SKX: # %bb.0: -; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 +; SKX-NEXT: retq %res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer ret <8 x double>%res } @@ -8644,13 +8636,13 @@ define <64 x i8> @_invec32xi8(<32 x i8>%a) { ; GENERIC-LABEL: _invec32xi8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpbroadcastb %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _invec32xi8: ; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpbroadcastb %xmm0, %zmm0 +; SKX-NEXT: retq %res = shufflevector <32 x i8> %a, <32 x i8> undef, <64 x i32> zeroinitializer ret <64 x i8>%res } @@ -8658,13 +8650,13 @@ define <32 x i16> @_invec16xi16(<16 x i16>%a) { ; GENERIC-LABEL: _invec16xi16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpbroadcastw %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _invec16xi16: ; SKX: # %bb.0: -; SKX-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpbroadcastw %xmm0, %zmm0 +; SKX-NEXT: retq %res = shufflevector <16 x i16> %a, <16 x i16> undef, <32 x i32> zeroinitializer ret <32 x i16>%res } @@ -8672,13 +8664,13 @@ define <16 x i32> @_invec8xi32(<8 x i32>%a) { ; GENERIC-LABEL: _invec8xi32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _invec8xi32: ; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastss %xmm0, %zmm0 +; SKX-NEXT: retq %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer ret <16 x i32>%res } @@ -8686,13 +8678,13 @@ define <8 x i64> @_invec4xi64(<4 x i64>%a) { ; GENERIC-LABEL: _invec4xi64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: _invec4xi64: ; SKX: # %bb.0: -; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 +; SKX-NEXT: retq %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer ret <8 x i64>%res } @@ -8701,27 +8693,27 @@ define <16 x float> @broadcast_ss_spill(float %x) { ; GENERIC-LABEL: broadcast_ss_spill: ; GENERIC: # %bb.0: -; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33] +; GENERIC-NEXT: subq $24, %rsp ; GENERIC-NEXT: .cfi_def_cfa_offset 32 -; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] +; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 +; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; GENERIC-NEXT: callq func_f32 -; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00] -; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] +; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload +; GENERIC-NEXT: addq $24, %rsp ; GENERIC-NEXT: .cfi_def_cfa_offset 8 -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: broadcast_ss_spill: ; SKX: # %bb.0: -; SKX-NEXT: subq $24, %rsp # sched: [1:0.25] +; SKX-NEXT: subq $24, %rsp ; SKX-NEXT: .cfi_def_cfa_offset 32 -; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] +; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 +; SKX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; SKX-NEXT: callq func_f32 -; SKX-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] -; SKX-NEXT: addq $24, %rsp # sched: [1:0.25] +; SKX-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload +; SKX-NEXT: addq $24, %rsp ; SKX-NEXT: .cfi_def_cfa_offset 8 -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %a = fadd float %x, %x call void @func_f32(float %a) %b = insertelement <16 x float> undef, float %a, i32 0 @@ -8733,27 +8725,27 @@ define <8 x double> @broadcast_sd_spill(double %x) { ; GENERIC-LABEL: broadcast_sd_spill: ; GENERIC: # %bb.0: -; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33] +; GENERIC-NEXT: subq $24, %rsp ; GENERIC-NEXT: .cfi_def_cfa_offset 32 -; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] +; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 +; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; GENERIC-NEXT: callq func_f64 -; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00] -; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] +; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload +; GENERIC-NEXT: addq $24, %rsp ; GENERIC-NEXT: .cfi_def_cfa_offset 8 -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: retq ; ; SKX-LABEL: broadcast_sd_spill: ; SKX: # %bb.0: -; SKX-NEXT: subq $24, %rsp # sched: [1:0.25] +; SKX-NEXT: subq $24, %rsp ; SKX-NEXT: .cfi_def_cfa_offset 32 -; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] +; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 +; SKX-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; SKX-NEXT: callq func_f64 -; SKX-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] -; SKX-NEXT: addq $24, %rsp # sched: [1:0.25] +; SKX-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload +; SKX-NEXT: addq $24, %rsp ; SKX-NEXT: .cfi_def_cfa_offset 8 -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: retq %a = fadd double %x, %x call void @func_f64(double %a) %b = insertelement <8 x double> undef, double %a, i32 0 Index: test/CodeGen/X86/avx512-shuffle-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-shuffle-schedule.ll +++ test/CodeGen/X86/avx512-shuffle-schedule.ll @@ -1,40 +1,38 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX - -; This test is an assembly of avx512 shuffling instructions to check their scheduling +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=SKX define <16 x i16> @test_16xi16_perm_mask0(<16 x i16> %vec) { ; GENERIC-LABEL: test_16xi16_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; GENERIC-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] +; GENERIC-NEXT: vpermw %ymm0, %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi16_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; SKX-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [6:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] +; SKX-NEXT: vpermw %ymm0, %ymm1, %ymm0 +; SKX-NEXT: retq %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] +; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 @@ -44,17 +42,17 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -63,19 +61,19 @@ define <16 x i16> @test_masked_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] +; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 @@ -85,17 +83,17 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -104,19 +102,19 @@ define <16 x i16> @test_masked_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] +; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 @@ -126,17 +124,17 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -145,34 +143,34 @@ define <16 x i16> @test_16xi16_perm_mask3(<16 x i16> %vec) { ; GENERIC-LABEL: test_16xi16_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; GENERIC-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] +; GENERIC-NEXT: vpermw %ymm0, %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi16_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; SKX-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [6:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] +; SKX-NEXT: vpermw %ymm0, %ymm1, %ymm0 +; SKX-NEXT: retq %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] +; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 @@ -182,17 +180,17 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -201,15 +199,15 @@ define <16 x i16> @test_16xi16_perm_mem_mask0(<16 x i16>* %vp) { ; GENERIC-LABEL: test_16xi16_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] +; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi16_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; SKX-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] +; SKX-NEXT: vpermw (%rdi), %ymm0, %ymm0 +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res @@ -217,17 +215,17 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -238,17 +236,17 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] +; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -259,17 +257,17 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -280,17 +278,17 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] +; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -301,17 +299,17 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -322,17 +320,17 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] +; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -343,15 +341,15 @@ define <16 x i16> @test_16xi16_perm_mem_mask3(<16 x i16>* %vp) { ; GENERIC-LABEL: test_16xi16_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] +; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi16_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; SKX-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] +; SKX-NEXT: vpermw (%rdi), %ymm0, %ymm0 +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res @@ -359,17 +357,17 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -380,17 +378,17 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] +; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -401,34 +399,34 @@ define <32 x i16> @test_32xi16_perm_mask0(<32 x i16> %vec) { ; GENERIC-LABEL: test_32xi16_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50] -; GENERIC-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] +; GENERIC-NEXT: vpermw %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi16_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50] -; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [6:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] +; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] +; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 @@ -438,17 +436,17 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -457,19 +455,19 @@ define <32 x i16> @test_masked_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] +; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 @@ -479,17 +477,17 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -498,19 +496,19 @@ define <32 x i16> @test_masked_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] +; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 @@ -520,17 +518,17 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -539,34 +537,34 @@ define <32 x i16> @test_32xi16_perm_mask3(<32 x i16> %vec) { ; GENERIC-LABEL: test_32xi16_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50] -; GENERIC-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] +; GENERIC-NEXT: vpermw %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi16_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50] -; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [6:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] +; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] +; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 @@ -576,17 +574,17 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -595,15 +593,15 @@ define <32 x i16> @test_32xi16_perm_mem_mask0(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50] -; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] +; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi16_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50] -; SKX-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] +; SKX-NEXT: vpermw (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res @@ -611,17 +609,17 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -632,17 +630,17 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] +; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -653,17 +651,17 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -674,17 +672,17 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] +; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -695,17 +693,17 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -716,17 +714,17 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] +; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -737,15 +735,15 @@ define <32 x i16> @test_32xi16_perm_mem_mask3(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50] -; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] +; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi16_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50] -; SKX-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] +; SKX-NEXT: vpermw (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res @@ -753,17 +751,17 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -774,17 +772,17 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50] -; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] +; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -795,34 +793,34 @@ define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) { ; GENERIC-LABEL: test_8xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [7:0.50] -; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] +; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [7:0.50] -; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] +; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; SKX-NEXT: retq %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res } define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 @@ -832,17 +830,17 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -851,19 +849,19 @@ define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi32_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 @@ -873,17 +871,17 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi32_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -892,19 +890,19 @@ define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi32_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 @@ -914,17 +912,17 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi32_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -933,34 +931,34 @@ define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) { ; GENERIC-LABEL: test_8xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [7:0.50] -; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] +; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [7:0.50] -; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] +; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; SKX-NEXT: retq %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res } define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 @@ -970,17 +968,17 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -989,15 +987,15 @@ define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) { ; GENERIC-LABEL: test_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50] -; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] +; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res @@ -1005,17 +1003,17 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -1026,17 +1024,17 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] +; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -1047,17 +1045,17 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi32_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -1068,17 +1066,17 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] +; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -1089,17 +1087,17 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi32_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -1110,17 +1108,17 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] +; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -1131,15 +1129,15 @@ define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) { ; GENERIC-LABEL: test_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50] -; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] +; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res @@ -1147,17 +1145,17 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -1168,17 +1166,17 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] +; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -1189,34 +1187,34 @@ define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50] -; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] +; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50] -; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] +; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res } define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 @@ -1226,17 +1224,17 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -1245,19 +1243,19 @@ define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi32_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 @@ -1267,17 +1265,17 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi32_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -1286,19 +1284,19 @@ define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi32_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 @@ -1308,17 +1306,17 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi32_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -1327,34 +1325,34 @@ define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) { ; GENERIC-LABEL: test_16xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50] -; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] +; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50] -; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] +; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res } define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 @@ -1364,17 +1362,17 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -1383,15 +1381,15 @@ define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) { ; GENERIC-LABEL: test_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] +; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50] -; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] +; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res @@ -1399,17 +1397,17 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -1420,17 +1418,17 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -1441,17 +1439,17 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi32_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -1462,17 +1460,17 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -1483,17 +1481,17 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi32_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -1504,17 +1502,17 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -1525,15 +1523,15 @@ define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) { ; GENERIC-LABEL: test_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] +; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50] -; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] +; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res @@ -1541,17 +1539,17 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -1562,17 +1560,17 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -1583,30 +1581,30 @@ define <4 x i64> @test_4xi64_perm_mask0(<4 x i64> %vec) { ; GENERIC-LABEL: test_4xi64_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] +; SKX-NEXT: retq %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> ret <4 x i64> %res } define <4 x i64> @test_masked_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi64_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 @@ -1616,15 +1614,15 @@ define <4 x i64> @test_masked_z_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi64_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -1633,17 +1631,17 @@ define <4 x i64> @test_masked_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi64_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 @@ -1653,15 +1651,15 @@ define <4 x i64> @test_masked_z_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi64_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -1670,17 +1668,17 @@ define <4 x i64> @test_masked_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi64_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 @@ -1690,15 +1688,15 @@ define <4 x i64> @test_masked_z_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi64_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -1707,30 +1705,30 @@ define <4 x i64> @test_4xi64_perm_mask3(<4 x i64> %vec) { ; GENERIC-LABEL: test_4xi64_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] +; SKX-NEXT: retq %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> ret <4 x i64> %res } define <4 x i64> @test_masked_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi64_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2 @@ -1740,15 +1738,15 @@ define <4 x i64> @test_masked_z_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi64_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -1757,13 +1755,13 @@ define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) { ; GENERIC-LABEL: test_4xi64_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] +; SKX-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> ret <4 x i64> %res @@ -1771,15 +1769,15 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi64_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] +; SKX-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -1790,15 +1788,15 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] +; SKX-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -1809,15 +1807,15 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi64_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] +; SKX-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -1828,15 +1826,15 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] +; SKX-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -1847,15 +1845,15 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi64_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] +; SKX-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -1866,15 +1864,15 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] +; SKX-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -1885,13 +1883,13 @@ define <4 x i64> @test_4xi64_perm_mem_mask3(<4 x i64>* %vp) { ; GENERIC-LABEL: test_4xi64_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] +; SKX-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> ret <4 x i64> %res @@ -1899,15 +1897,15 @@ define <4 x i64> @test_masked_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi64_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] +; SKX-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -1918,15 +1916,15 @@ define <4 x i64> @test_masked_z_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] +; SKX-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -1937,34 +1935,34 @@ define <8 x i64> @test_8xi64_perm_mask0(<8 x i64> %vec) { ; GENERIC-LABEL: test_8xi64_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] sched: [7:0.50] -; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] +; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] sched: [8:0.50] -; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] +; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> ret <8 x i64> %res } define <8 x i64> @test_masked_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 @@ -1974,17 +1972,17 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -1993,17 +1991,17 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 @@ -2013,15 +2011,15 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -2030,19 +2028,19 @@ define <8 x i64> @test_masked_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 @@ -2052,17 +2050,17 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -2071,30 +2069,30 @@ define <8 x i64> @test_8xi64_perm_imm_mask3(<8 x i64> %vec) { ; GENERIC-LABEL: test_8xi64_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_perm_imm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5] +; SKX-NEXT: retq %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> ret <8 x i64> %res } define <8 x i64> @test_masked_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 @@ -2104,15 +2102,15 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -2121,19 +2119,19 @@ define <8 x i64> @test_masked_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 @@ -2143,17 +2141,17 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -2162,17 +2160,17 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 @@ -2182,15 +2180,15 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -2199,34 +2197,34 @@ define <8 x i64> @test_8xi64_perm_mask6(<8 x i64> %vec) { ; GENERIC-LABEL: test_8xi64_perm_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] sched: [7:0.50] -; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] +; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_perm_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] sched: [8:0.50] -; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] +; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> ret <8 x i64> %res } define <8 x i64> @test_masked_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 @@ -2236,17 +2234,17 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -2255,17 +2253,17 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2 @@ -2275,15 +2273,15 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -2292,15 +2290,15 @@ define <8 x i64> @test_8xi64_perm_mem_mask0(<8 x i64>* %vp) { ; GENERIC-LABEL: test_8xi64_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [7:0.50] -; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] +; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [8:0.50] -; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] +; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> ret <8 x i64> %res @@ -2308,17 +2306,17 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2329,17 +2327,17 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2350,15 +2348,15 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2369,15 +2367,15 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2388,17 +2386,17 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2409,17 +2407,17 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2430,13 +2428,13 @@ define <8 x i64> @test_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp) { ; GENERIC-LABEL: test_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_perm_imm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> ret <8 x i64> %res @@ -2444,15 +2442,15 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2463,15 +2461,15 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2482,17 +2480,17 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_mem_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2503,17 +2501,17 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2524,15 +2522,15 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2543,15 +2541,15 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2562,15 +2560,15 @@ define <8 x i64> @test_8xi64_perm_mem_mask6(<8 x i64>* %vp) { ; GENERIC-LABEL: test_8xi64_perm_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [7:0.50] -; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] +; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_perm_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [8:0.50] -; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] +; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> ret <8 x i64> %res @@ -2578,17 +2576,17 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2599,17 +2597,17 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2620,15 +2618,15 @@ define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2639,15 +2637,15 @@ define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] +; SKX-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -2658,34 +2656,34 @@ define <8 x float> @test_8xfloat_perm_mask0(<8 x float> %vec) { ; GENERIC-LABEL: test_8xfloat_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] sched: [7:0.50] -; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] +; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] sched: [7:0.50] -; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] +; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; SKX-NEXT: retq %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> ret <8 x float> %res } define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xfloat_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xfloat_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 @@ -2695,17 +2693,17 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -2714,19 +2712,19 @@ define <8 x float> @test_masked_8xfloat_perm_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xfloat_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xfloat_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 @@ -2736,17 +2734,17 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask1(<8 x float> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -2755,19 +2753,19 @@ define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xfloat_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xfloat_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 @@ -2777,17 +2775,17 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -2796,34 +2794,34 @@ define <8 x float> @test_8xfloat_perm_mask3(<8 x float> %vec) { ; GENERIC-LABEL: test_8xfloat_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] sched: [7:0.50] -; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] +; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] sched: [7:0.50] -; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] +; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; SKX-NEXT: retq %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> ret <8 x float> %res } define <8 x float> @test_masked_8xfloat_perm_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xfloat_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xfloat_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2 @@ -2833,17 +2831,17 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask3(<8 x float> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -2852,15 +2850,15 @@ define <8 x float> @test_8xfloat_perm_mem_mask0(<8 x float>* %vp) { ; GENERIC-LABEL: test_8xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50] -; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] +; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 +; SKX-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> ret <8 x float> %res @@ -2868,17 +2866,17 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -2889,17 +2887,17 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] +; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -2910,17 +2908,17 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -2931,17 +2929,17 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] +; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -2952,17 +2950,17 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -2973,17 +2971,17 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] +; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -2994,15 +2992,15 @@ define <8 x float> @test_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] +; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50] -; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] +; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 +; SKX-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> ret <8 x float> %res @@ -3010,17 +3008,17 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xfloat_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -3031,17 +3029,17 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50] -; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] +; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -3052,34 +3050,34 @@ define <16 x float> @test_16xfloat_perm_mask0(<16 x float> %vec) { ; GENERIC-LABEL: test_16xfloat_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50] -; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] +; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50] -; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] +; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> ret <16 x float> %res } define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xfloat_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xfloat_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 @@ -3089,17 +3087,17 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -3108,19 +3106,19 @@ define <16 x float> @test_masked_16xfloat_perm_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xfloat_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xfloat_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 @@ -3130,17 +3128,17 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask1(<16 x float> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -3149,19 +3147,19 @@ define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xfloat_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xfloat_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 @@ -3171,17 +3169,17 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -3190,34 +3188,34 @@ define <16 x float> @test_16xfloat_perm_mask3(<16 x float> %vec) { ; GENERIC-LABEL: test_16xfloat_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50] -; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] +; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50] -; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] +; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> ret <16 x float> %res } define <16 x float> @test_masked_16xfloat_perm_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xfloat_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xfloat_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2 @@ -3227,17 +3225,17 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask3(<16 x float> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -3246,15 +3244,15 @@ define <16 x float> @test_16xfloat_perm_mem_mask0(<16 x float>* %vp) { ; GENERIC-LABEL: test_16xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] +; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50] -; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] +; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> ret <16 x float> %res @@ -3262,17 +3260,17 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -3283,17 +3281,17 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -3304,17 +3302,17 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -3325,17 +3323,17 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -3346,17 +3344,17 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -3367,17 +3365,17 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -3388,15 +3386,15 @@ define <16 x float> @test_16xfloat_perm_mem_mask3(<16 x float>* %vp) { ; GENERIC-LABEL: test_16xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50] -; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] +; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50] -; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] +; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> ret <16 x float> %res @@ -3404,17 +3402,17 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xfloat_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -3425,17 +3423,17 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50] -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -3446,30 +3444,30 @@ define <4 x double> @test_4xdouble_perm_mask0(<4 x double> %vec) { ; GENERIC-LABEL: test_4xdouble_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,2] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,2] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,2] +; SKX-NEXT: retq %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> ret <4 x double> %res } define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xdouble_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] sched: [3:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 @@ -3479,15 +3477,15 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -3496,17 +3494,17 @@ define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xdouble_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] sched: [3:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 @@ -3516,15 +3514,15 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -3533,17 +3531,17 @@ define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xdouble_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] sched: [3:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 @@ -3553,15 +3551,15 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -3570,30 +3568,30 @@ define <4 x double> @test_4xdouble_perm_mask3(<4 x double> %vec) { ; GENERIC-LABEL: test_4xdouble_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,2] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,2] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,2] +; SKX-NEXT: retq %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> ret <4 x double> %res } define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xdouble_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] sched: [3:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2 @@ -3603,15 +3601,15 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -3620,13 +3618,13 @@ define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) { ; GENERIC-LABEL: test_4xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] +; SKX-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> ret <4 x double> %res @@ -3634,15 +3632,15 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] +; SKX-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -3653,15 +3651,15 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] +; SKX-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -3672,15 +3670,15 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] +; SKX-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -3691,15 +3689,15 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] +; SKX-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -3710,15 +3708,15 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] +; SKX-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -3729,15 +3727,15 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] +; SKX-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -3748,13 +3746,13 @@ define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) { ; GENERIC-LABEL: test_4xdouble_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] +; SKX-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> ret <4 x double> %res @@ -3762,15 +3760,15 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xdouble_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] +; SKX-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -3781,15 +3779,15 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 +; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] +; SKX-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -3800,34 +3798,34 @@ define <8 x double> @test_8xdouble_perm_mask0(<8 x double> %vec) { ; GENERIC-LABEL: test_8xdouble_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] sched: [7:0.50] -; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] +; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] sched: [8:0.50] -; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] +; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> ret <8 x double> %res } define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 @@ -3837,17 +3835,17 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -3856,17 +3854,17 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [3:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 @@ -3876,15 +3874,15 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -3893,19 +3891,19 @@ define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 @@ -3915,17 +3913,17 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -3934,30 +3932,30 @@ define <8 x double> @test_8xdouble_perm_imm_mask3(<8 x double> %vec) { ; GENERIC-LABEL: test_8xdouble_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_perm_imm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4] +; SKX-NEXT: retq %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> ret <8 x double> %res } define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 @@ -3967,15 +3965,15 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -3984,19 +3982,19 @@ define <8 x double> @test_masked_8xdouble_perm_mask4(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 @@ -4006,17 +4004,17 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask4(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -4025,17 +4023,17 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [3:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 @@ -4045,15 +4043,15 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -4062,34 +4060,34 @@ define <8 x double> @test_8xdouble_perm_mask6(<8 x double> %vec) { ; GENERIC-LABEL: test_8xdouble_perm_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] sched: [7:0.50] -; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] +; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_perm_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] sched: [8:0.50] -; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] +; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: retq %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> ret <8 x double> %res } define <8 x double> @test_masked_8xdouble_perm_mask6(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 @@ -4099,17 +4097,17 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask6(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -4118,17 +4116,17 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [3:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2 @@ -4138,15 +4136,15 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -4155,15 +4153,15 @@ define <8 x double> @test_8xdouble_perm_mem_mask0(<8 x double>* %vp) { ; GENERIC-LABEL: test_8xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [7:0.50] -; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] +; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [8:0.50] -; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] +; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> ret <8 x double> %res @@ -4171,17 +4169,17 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4192,17 +4190,17 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4213,15 +4211,15 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4232,15 +4230,15 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4251,17 +4249,17 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4272,17 +4270,17 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4293,13 +4291,13 @@ define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) { ; GENERIC-LABEL: test_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_perm_imm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> ret <8 x double> %res @@ -4307,15 +4305,15 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4326,15 +4324,15 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4345,17 +4343,17 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4366,17 +4364,17 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4387,15 +4385,15 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4406,15 +4404,15 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4425,15 +4423,15 @@ define <8 x double> @test_8xdouble_perm_mem_mask6(<8 x double>* %vp) { ; GENERIC-LABEL: test_8xdouble_perm_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [7:0.50] -; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] +; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_perm_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [8:0.50] -; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] +; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> ret <8 x double> %res @@ -4441,17 +4439,17 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4462,17 +4460,17 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [7:0.50] -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [8:0.50] -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4483,15 +4481,15 @@ define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4502,15 +4500,15 @@ define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x i64> %mask) { ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 +; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] +; SKX-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -4521,30 +4519,30 @@ define <16 x i8> @test_16xi8_perm_mask0(<16 x i8> %vec) { ; GENERIC-LABEL: test_16xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi8_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] +; SKX-NEXT: retq %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> ret <16 x i8> %res } define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi8_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 @@ -4554,15 +4552,15 @@ define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi8_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] +; SKX-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer @@ -4571,17 +4569,17 @@ define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi8_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 @@ -4591,15 +4589,15 @@ define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi8_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] +; SKX-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer @@ -4608,17 +4606,17 @@ define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi8_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 @@ -4628,15 +4626,15 @@ define <16 x i8> @test_masked_z_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi8_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] +; SKX-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer @@ -4645,30 +4643,30 @@ define <16 x i8> @test_16xi8_perm_mask3(<16 x i8> %vec) { ; GENERIC-LABEL: test_16xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi8_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] +; SKX-NEXT: retq %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> ret <16 x i8> %res } define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi8_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2 @@ -4678,15 +4676,15 @@ define <16 x i8> @test_masked_z_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi8_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] +; SKX-NEXT: retq %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer @@ -4695,15 +4693,15 @@ define <16 x i8> @test_16xi8_perm_mem_mask0(<16 x i8>* %vp) { ; GENERIC-LABEL: test_16xi8_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %xmm0 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi8_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %xmm0 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] +; SKX-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> ret <16 x i8> %res @@ -4711,17 +4709,17 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi8_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %xmm2 +; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] +; SKX-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer @@ -4732,17 +4730,17 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 +; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %xmm1 +; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] +; SKX-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer @@ -4753,17 +4751,17 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi8_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %xmm2 +; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] +; SKX-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer @@ -4774,17 +4772,17 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 +; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %xmm1 +; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] +; SKX-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer @@ -4795,17 +4793,17 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi8_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %xmm2 +; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] +; SKX-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer @@ -4816,17 +4814,17 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 +; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %xmm1 +; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] +; SKX-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer @@ -4837,15 +4835,15 @@ define <16 x i8> @test_16xi8_perm_mem_mask3(<16 x i8>* %vp) { ; GENERIC-LABEL: test_16xi8_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %xmm0 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi8_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %xmm0 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] +; SKX-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> ret <16 x i8> %res @@ -4853,17 +4851,17 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 +; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi8_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50] -; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %xmm2 +; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] +; SKX-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer @@ -4874,17 +4872,17 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 +; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] -; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %xmm1 +; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] +; SKX-NEXT: retq %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %cmp = icmp eq <16 x i8> %mask, zeroinitializer @@ -4895,30 +4893,30 @@ define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) { ; GENERIC-LABEL: test_32xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi8_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] +; SKX-NEXT: retq %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> ret <32 x i8> %res } define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:0.50] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi8_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 @@ -4928,15 +4926,15 @@ define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi8_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] +; SKX-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer @@ -4945,17 +4943,17 @@ define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:0.50] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi8_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 @@ -4965,15 +4963,15 @@ define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi8_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] +; SKX-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer @@ -4982,17 +4980,17 @@ define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:0.50] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi8_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 @@ -5002,15 +5000,15 @@ define <32 x i8> @test_masked_z_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi8_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] +; SKX-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer @@ -5019,30 +5017,30 @@ define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) { ; GENERIC-LABEL: test_32xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi8_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] +; SKX-NEXT: retq %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> ret <32 x i8> %res } define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:0.50] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi8_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2 @@ -5052,15 +5050,15 @@ define <32 x i8> @test_masked_z_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi8_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] +; SKX-NEXT: retq %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer @@ -5069,15 +5067,15 @@ define <32 x i8> @test_32xi8_perm_mem_mask0(<32 x i8>* %vp) { ; GENERIC-LABEL: test_32xi8_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi8_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %ymm0 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] +; SKX-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> ret <32 x i8> %res @@ -5085,17 +5083,17 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi8_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %ymm2 +; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] +; SKX-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer @@ -5106,17 +5104,17 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 +; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %ymm1 +; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] +; SKX-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer @@ -5127,17 +5125,17 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi8_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %ymm2 +; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] +; SKX-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer @@ -5148,17 +5146,17 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 +; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %ymm1 +; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] +; SKX-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer @@ -5169,17 +5167,17 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi8_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %ymm2 +; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] +; SKX-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer @@ -5190,17 +5188,17 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 +; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %ymm1 +; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] +; SKX-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer @@ -5211,15 +5209,15 @@ define <32 x i8> @test_32xi8_perm_mem_mask3(<32 x i8>* %vp) { ; GENERIC-LABEL: test_32xi8_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi8_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %ymm0 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] +; SKX-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> ret <32 x i8> %res @@ -5227,17 +5225,17 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 +; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi8_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50] -; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %ymm2 +; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] +; SKX-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer @@ -5248,17 +5246,17 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 +; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] -; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa (%rdi), %ymm1 +; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] +; SKX-NEXT: retq %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %cmp = icmp eq <32 x i8> %mask, zeroinitializer @@ -5269,30 +5267,30 @@ define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) { ; GENERIC-LABEL: test_64xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_64xi8_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] +; SKX-NEXT: retq %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> ret <64 x i8> %res } define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_64xi8_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 @@ -5302,15 +5300,15 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] +; SKX-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer @@ -5319,17 +5317,17 @@ define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:0.50] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_64xi8_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 @@ -5339,15 +5337,15 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] +; SKX-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer @@ -5356,17 +5354,17 @@ define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:0.50] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_64xi8_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 @@ -5376,15 +5374,15 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] +; SKX-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer @@ -5393,30 +5391,30 @@ define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) { ; GENERIC-LABEL: test_64xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_64xi8_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] +; SKX-NEXT: retq %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> ret <64 x i8> %res } define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_64xi8_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2 @@ -5426,15 +5424,15 @@ define <64 x i8> @test_masked_z_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_64xi8_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] +; SKX-NEXT: retq %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer @@ -5443,15 +5441,15 @@ define <64 x i8> @test_64xi8_perm_mem_mask0(<64 x i8>* %vp) { ; GENERIC-LABEL: test_64xi8_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_64xi8_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] +; SKX-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> ret <64 x i8> %res @@ -5459,17 +5457,17 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50] -; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 +; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] +; SKX-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer @@ -5480,17 +5478,17 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] -; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 +; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] +; SKX-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer @@ -5501,17 +5499,17 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50] -; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 +; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] +; SKX-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer @@ -5522,17 +5520,17 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] -; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 +; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] +; SKX-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer @@ -5543,17 +5541,17 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50] -; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 +; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] +; SKX-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer @@ -5564,17 +5562,17 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] -; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 +; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] +; SKX-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer @@ -5585,15 +5583,15 @@ define <64 x i8> @test_64xi8_perm_mem_mask3(<64 x i8>* %vp) { ; GENERIC-LABEL: test_64xi8_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_64xi8_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] +; SKX-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> ret <64 x i8> %res @@ -5601,17 +5599,17 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 +; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_64xi8_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50] -; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 +; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] +; SKX-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer @@ -5622,17 +5620,17 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %mask) { ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50] -; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 +; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] -; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 +; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] +; SKX-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %cmp = icmp eq <64 x i8> %mask, zeroinitializer @@ -5643,30 +5641,30 @@ define <8 x i16> @test_8xi16_perm_high_mask0(<8 x i16> %vec) { ; GENERIC-LABEL: test_8xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi16_perm_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] +; SKX-NEXT: retq %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 @@ -5676,15 +5674,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -5693,17 +5691,17 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 @@ -5713,15 +5711,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -5730,17 +5728,17 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 @@ -5750,15 +5748,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -5767,30 +5765,30 @@ define <8 x i16> @test_8xi16_perm_low_mask3(<8 x i16> %vec) { ; GENERIC-LABEL: test_8xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi16_perm_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] +; SKX-NEXT: retq %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 @@ -5800,15 +5798,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -5817,17 +5815,17 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_high_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 @@ -5837,15 +5835,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -5854,17 +5852,17 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_low_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 @@ -5874,15 +5872,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -5891,30 +5889,30 @@ define <8 x i16> @test_8xi16_perm_high_mask6(<8 x i16> %vec) { ; GENERIC-LABEL: test_8xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi16_perm_high_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] +; SKX-NEXT: retq %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res } define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_high_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 @@ -5924,15 +5922,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -5941,17 +5939,17 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_low_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2 @@ -5961,15 +5959,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] +; SKX-NEXT: retq %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -5978,13 +5976,13 @@ define <8 x i16> @test_8xi16_perm_high_mem_mask0(<8 x i16>* %vp) { ; GENERIC-LABEL: test_8xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi16_perm_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res @@ -5992,15 +5990,15 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6011,15 +6009,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6030,15 +6028,15 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6049,15 +6047,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6068,15 +6066,15 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6087,15 +6085,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6106,13 +6104,13 @@ define <8 x i16> @test_8xi16_perm_low_mem_mask3(<8 x i16>* %vp) { ; GENERIC-LABEL: test_8xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi16_perm_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res @@ -6120,15 +6118,15 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6139,15 +6137,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6158,15 +6156,15 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6177,15 +6175,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6196,15 +6194,15 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6215,15 +6213,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6234,13 +6232,13 @@ define <8 x i16> @test_8xi16_perm_high_mem_mask6(<8 x i16>* %vp) { ; GENERIC-LABEL: test_8xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi16_perm_high_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res @@ -6248,15 +6246,15 @@ define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6267,15 +6265,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6286,15 +6284,15 @@ define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6305,15 +6303,15 @@ define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] +; SKX-NEXT: retq %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %cmp = icmp eq <8 x i16> %mask, zeroinitializer @@ -6324,30 +6322,30 @@ define <16 x i16> @test_16xi16_perm_high_mask0(<16 x i16> %vec) { ; GENERIC-LABEL: test_16xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi16_perm_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] +; SKX-NEXT: retq %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 @@ -6357,15 +6355,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -6374,17 +6372,17 @@ define <16 x i16> @test_masked_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 @@ -6394,15 +6392,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -6411,17 +6409,17 @@ define <16 x i16> @test_masked_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 @@ -6431,15 +6429,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -6448,30 +6446,30 @@ define <16 x i16> @test_16xi16_perm_low_mask3(<16 x i16> %vec) { ; GENERIC-LABEL: test_16xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi16_perm_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] +; SKX-NEXT: retq %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 @@ -6481,15 +6479,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -6498,17 +6496,17 @@ define <16 x i16> @test_masked_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_high_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 @@ -6518,15 +6516,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_high_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -6535,17 +6533,17 @@ define <16 x i16> @test_masked_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_low_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 @@ -6555,15 +6553,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_low_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -6572,30 +6570,30 @@ define <16 x i16> @test_16xi16_perm_high_mask6(<16 x i16> %vec) { ; GENERIC-LABEL: test_16xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi16_perm_high_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] +; SKX-NEXT: retq %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_high_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 @@ -6605,15 +6603,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_high_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -6622,17 +6620,17 @@ define <16 x i16> @test_masked_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_low_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2 @@ -6642,15 +6640,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_low_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] +; SKX-NEXT: retq %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -6659,13 +6657,13 @@ define <16 x i16> @test_16xi16_perm_high_mem_mask0(<16 x i16>* %vp) { ; GENERIC-LABEL: test_16xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi16_perm_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res @@ -6673,15 +6671,15 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6692,15 +6690,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6711,15 +6709,15 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6730,15 +6728,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6749,15 +6747,15 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6768,15 +6766,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6787,13 +6785,13 @@ define <16 x i16> @test_16xi16_perm_low_mem_mask3(<16 x i16>* %vp) { ; GENERIC-LABEL: test_16xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi16_perm_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res @@ -6801,15 +6799,15 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6820,15 +6818,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6839,15 +6837,15 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6858,15 +6856,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6877,15 +6875,15 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6896,15 +6894,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6915,13 +6913,13 @@ define <16 x i16> @test_16xi16_perm_high_mem_mask6(<16 x i16>* %vp) { ; GENERIC-LABEL: test_16xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi16_perm_high_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res @@ -6929,15 +6927,15 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6948,15 +6946,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6967,15 +6965,15 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -6986,15 +6984,15 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] +; SKX-NEXT: retq %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %cmp = icmp eq <16 x i16> %mask, zeroinitializer @@ -7005,30 +7003,30 @@ define <32 x i16> @test_32xi16_perm_high_mask0(<32 x i16> %vec) { ; GENERIC-LABEL: test_32xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi16_perm_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] +; SKX-NEXT: retq %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 @@ -7038,15 +7036,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -7055,17 +7053,17 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 @@ -7075,15 +7073,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -7092,17 +7090,17 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 @@ -7112,15 +7110,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -7129,30 +7127,30 @@ define <32 x i16> @test_32xi16_perm_low_mask3(<32 x i16> %vec) { ; GENERIC-LABEL: test_32xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi16_perm_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] +; SKX-NEXT: retq %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 @@ -7162,15 +7160,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -7179,17 +7177,17 @@ define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_high_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 @@ -7199,15 +7197,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -7216,17 +7214,17 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_low_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 @@ -7236,15 +7234,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -7253,30 +7251,30 @@ define <32 x i16> @test_32xi16_perm_high_mask6(<32 x i16> %vec) { ; GENERIC-LABEL: test_32xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi16_perm_high_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] +; SKX-NEXT: retq %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_high_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 @@ -7286,15 +7284,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -7303,17 +7301,17 @@ define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_low_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2 @@ -7323,15 +7321,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] +; SKX-NEXT: retq %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -7340,13 +7338,13 @@ define <32 x i16> @test_32xi16_perm_high_mem_mask0(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi16_perm_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res @@ -7354,15 +7352,15 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7373,15 +7371,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7392,15 +7390,15 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7411,15 +7409,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7430,15 +7428,15 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7449,15 +7447,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7468,13 +7466,13 @@ define <32 x i16> @test_32xi16_perm_low_mem_mask3(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi16_perm_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res @@ -7482,15 +7480,15 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7501,15 +7499,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7520,15 +7518,15 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7539,15 +7537,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask4: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7558,17 +7556,17 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7579,17 +7577,17 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: ; SKX: # %bb.0: -; SKX-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] -; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] +; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; SKX-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7600,13 +7598,13 @@ define <32 x i16> @test_32xi16_perm_high_mem_mask6(<32 x i16>* %vp) { ; GENERIC-LABEL: test_32xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_32xi16_perm_high_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res @@ -7614,15 +7612,15 @@ define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7633,15 +7631,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask6: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7652,15 +7650,15 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7671,15 +7669,15 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %mask) { ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask7: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] +; SKX-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %cmp = icmp eq <32 x i16> %mask, zeroinitializer @@ -7690,30 +7688,30 @@ define <4 x i32> @test_4xi32_perm_mask0(<4 x i32> %vec) { ; GENERIC-LABEL: test_4xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] +; SKX-NEXT: retq %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %res } define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 @@ -7723,15 +7721,15 @@ define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] +; SKX-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer @@ -7740,17 +7738,17 @@ define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi32_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 @@ -7760,15 +7758,15 @@ define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi32_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] +; SKX-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer @@ -7777,17 +7775,17 @@ define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi32_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 @@ -7797,15 +7795,15 @@ define <4 x i32> @test_masked_z_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi32_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] +; SKX-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer @@ -7814,30 +7812,30 @@ define <4 x i32> @test_4xi32_perm_mask3(<4 x i32> %vec) { ; GENERIC-LABEL: test_4xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] +; SKX-NEXT: retq %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %res } define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:1.00] -; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] +; SKX-NEXT: vmovdqa %xmm1, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2 @@ -7847,15 +7845,15 @@ define <4 x i32> @test_masked_z_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] +; SKX-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer @@ -7864,13 +7862,13 @@ define <4 x i32> @test_4xi32_perm_mem_mask0(<4 x i32>* %vp) { ; GENERIC-LABEL: test_4xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] +; SKX-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %res @@ -7878,15 +7876,15 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] +; SKX-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -7897,15 +7895,15 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] +; SKX-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -7916,15 +7914,15 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi32_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] +; SKX-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -7935,15 +7933,15 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] +; SKX-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -7954,15 +7952,15 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi32_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] +; SKX-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -7973,15 +7971,15 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] +; SKX-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -7992,13 +7990,13 @@ define <4 x i32> @test_4xi32_perm_mem_mask3(<4 x i32>* %vp) { ; GENERIC-LABEL: test_4xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] +; SKX-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %res @@ -8006,15 +8004,15 @@ define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_4xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] +; SKX-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -8025,15 +8023,15 @@ define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %mask) { ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] +; SKX-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -8044,30 +8042,30 @@ define <8 x i32> @test2_8xi32_perm_mask0(<8 x i32> %vec) { ; GENERIC-LABEL: test2_8xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_8xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] +; SKX-NEXT: retq %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res } define <8 x i32> @test2_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_8xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 @@ -8077,15 +8075,15 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_8xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -8094,17 +8092,17 @@ define <8 x i32> @test2_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_8xi32_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 @@ -8114,15 +8112,15 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_8xi32_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -8131,17 +8129,17 @@ define <8 x i32> @test2_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_8xi32_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 @@ -8151,15 +8149,15 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_8xi32_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -8168,30 +8166,30 @@ define <8 x i32> @test2_8xi32_perm_mask3(<8 x i32> %vec) { ; GENERIC-LABEL: test2_8xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_8xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] +; SKX-NEXT: retq %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res } define <8 x i32> @test2_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_8xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2 @@ -8201,15 +8199,15 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_8xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -8218,13 +8216,13 @@ define <8 x i32> @test2_8xi32_perm_mem_mask0(<8 x i32>* %vp) { ; GENERIC-LABEL: test2_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_8xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res @@ -8232,15 +8230,15 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -8251,15 +8249,15 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -8270,15 +8268,15 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -8289,15 +8287,15 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -8308,15 +8306,15 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -8327,15 +8325,15 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -8346,13 +8344,13 @@ define <8 x i32> @test2_8xi32_perm_mem_mask3(<8 x i32>* %vp) { ; GENERIC-LABEL: test2_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_8xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res @@ -8360,15 +8358,15 @@ define <8 x i32> @test2_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_8xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -8379,15 +8377,15 @@ define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 +; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] +; SKX-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -8398,30 +8396,30 @@ define <16 x i32> @test2_16xi32_perm_mask0(<16 x i32> %vec) { ; GENERIC-LABEL: test2_16xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_16xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] +; SKX-NEXT: retq %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res } define <16 x i32> @test2_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_16xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 @@ -8431,15 +8429,15 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -8448,17 +8446,17 @@ define <16 x i32> @test2_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_16xi32_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 @@ -8468,15 +8466,15 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -8485,17 +8483,17 @@ define <16 x i32> @test2_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_16xi32_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 @@ -8505,15 +8503,15 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -8522,30 +8520,30 @@ define <16 x i32> @test2_16xi32_perm_mask3(<16 x i32> %vec) { ; GENERIC-LABEL: test2_16xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_16xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] +; SKX-NEXT: retq %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res } define <16 x i32> @test2_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_16xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2 @@ -8555,15 +8553,15 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -8572,13 +8570,13 @@ define <16 x i32> @test2_16xi32_perm_mem_mask0(<16 x i32>* %vp) { ; GENERIC-LABEL: test2_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_16xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res @@ -8586,15 +8584,15 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -8605,15 +8603,15 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -8624,15 +8622,15 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -8643,15 +8641,15 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -8662,15 +8660,15 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -8681,15 +8679,15 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -8700,13 +8698,13 @@ define <16 x i32> @test2_16xi32_perm_mem_mask3(<16 x i32>* %vp) { ; GENERIC-LABEL: test2_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_16xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res @@ -8714,15 +8712,15 @@ define <16 x i32> @test2_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_16xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -8733,15 +8731,15 @@ define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) { ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] +; SKX-NEXT: retq %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -8752,30 +8750,30 @@ define <8 x float> @test2_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2) { ; GENERIC-LABEL: test2_8xfloat_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_8xfloat_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; SKX-NEXT: retq %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res } define <8 x float> @test2_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vmovaps %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_8xfloat_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] -; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; SKX-NEXT: vmovaps %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 @@ -8785,15 +8783,15 @@ define <8 x float> @test2_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -8802,17 +8800,17 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vmovaps %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_8xfloat_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] -; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; SKX-NEXT: vmovaps %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 @@ -8822,15 +8820,15 @@ define <8 x float> @test2_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -8839,17 +8837,17 @@ define <8 x float> @test2_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; GENERIC-NEXT: vmovaps %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_8xfloat_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] -; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; SKX-NEXT: vmovaps %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 @@ -8859,15 +8857,15 @@ define <8 x float> @test2_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -8876,30 +8874,30 @@ define <8 x float> @test2_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2) { ; GENERIC-LABEL: test2_8xfloat_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_8xfloat_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; SKX-NEXT: retq %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res } define <8 x float> @test2_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vmovaps %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test2_8xfloat_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] -; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; SKX-NEXT: vmovaps %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 @@ -8909,15 +8907,15 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -8926,13 +8924,13 @@ define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { ; GENERIC-LABEL: test_8xfloat_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res @@ -8940,17 +8938,17 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -8961,15 +8959,15 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -8980,17 +8978,17 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -9001,15 +8999,15 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -9020,17 +9018,17 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -9041,15 +9039,15 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -9060,13 +9058,13 @@ define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { ; GENERIC-LABEL: test_8xfloat_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res @@ -9074,17 +9072,17 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -9095,15 +9093,15 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -9114,30 +9112,30 @@ define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7] +; SKX-NEXT: retq %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res } define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 @@ -9147,15 +9145,15 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -9164,17 +9162,17 @@ define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [3:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 @@ -9184,15 +9182,15 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -9201,17 +9199,17 @@ define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [3:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 @@ -9221,15 +9219,15 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -9238,30 +9236,30 @@ define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2) { ; GENERIC-LABEL: test_16xfloat_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5] +; SKX-NEXT: retq %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res } define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [3:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 @@ -9271,15 +9269,15 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -9288,13 +9286,13 @@ define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res @@ -9302,17 +9300,17 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -9323,15 +9321,15 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -9342,17 +9340,17 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -9363,15 +9361,15 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -9382,17 +9380,17 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [10:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -9403,15 +9401,15 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -9422,13 +9420,13 @@ define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res @@ -9436,17 +9434,17 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -9457,15 +9455,15 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -9476,30 +9474,30 @@ define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2) { ; GENERIC-LABEL: test_4xdouble_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; SKX-NEXT: retq %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res } define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: vmovapd %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] +; SKX-NEXT: vmovapd %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 @@ -9509,15 +9507,15 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -9526,17 +9524,17 @@ define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: vmovapd %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] +; SKX-NEXT: vmovapd %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 @@ -9546,15 +9544,15 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -9563,17 +9561,17 @@ define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: vmovapd %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] +; SKX-NEXT: vmovapd %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 @@ -9583,15 +9581,15 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -9600,30 +9598,30 @@ define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2) { ; GENERIC-LABEL: test_4xdouble_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; SKX-NEXT: retq %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res } define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: vmovapd %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] +; SKX-NEXT: vmovapd %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 @@ -9633,15 +9631,15 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -9650,13 +9648,13 @@ define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { ; GENERIC-LABEL: test_4xdouble_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res @@ -9664,17 +9662,17 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -9685,15 +9683,15 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -9704,17 +9702,17 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -9725,15 +9723,15 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -9744,17 +9742,17 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -9765,15 +9763,15 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -9784,13 +9782,13 @@ define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { ; GENERIC-LABEL: test_4xdouble_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res @@ -9798,17 +9796,17 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -9819,15 +9817,15 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -9838,30 +9836,30 @@ define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2) { ; GENERIC-LABEL: test_8xdouble_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] +; SKX-NEXT: retq %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res } define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] +; GENERIC-NEXT: vmovapd %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00] -; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] +; SKX-NEXT: vmovapd %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 @@ -9871,15 +9869,15 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -9888,17 +9886,17 @@ define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] +; GENERIC-NEXT: vmovapd %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [3:1.00] -; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] +; SKX-NEXT: vmovapd %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 @@ -9908,15 +9906,15 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -9925,17 +9923,17 @@ define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] +; GENERIC-NEXT: vmovapd %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [3:1.00] -; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] +; SKX-NEXT: vmovapd %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 @@ -9945,15 +9943,15 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -9962,30 +9960,30 @@ define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2) { ; GENERIC-LABEL: test_8xdouble_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] +; SKX-NEXT: retq %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res } define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] +; GENERIC-NEXT: vmovapd %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00] -; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] +; SKX-NEXT: vmovapd %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 @@ -9995,15 +9993,15 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -10012,13 +10010,13 @@ define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res @@ -10026,17 +10024,17 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -10047,15 +10045,15 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -10066,17 +10064,17 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [10:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -10087,15 +10085,15 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -10106,17 +10104,17 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [10:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -10127,15 +10125,15 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -10146,13 +10144,13 @@ define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res @@ -10160,17 +10158,17 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -10181,15 +10179,15 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -10200,30 +10198,30 @@ define <8 x i32> @test_8xi32_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2) { ; GENERIC-LABEL: test_8xi32_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; SKX-NEXT: retq %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> ret <8 x i32> %res } define <8 x i32> @test_8xi32_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; SKX-NEXT: vmovdqa %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 @@ -10233,15 +10231,15 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -10250,17 +10248,17 @@ define <8 x i32> @test_8xi32_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; SKX-NEXT: vmovdqa %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 @@ -10270,15 +10268,15 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -10287,17 +10285,17 @@ define <8 x i32> @test_8xi32_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; SKX-NEXT: vmovdqa %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 @@ -10307,15 +10305,15 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -10324,30 +10322,30 @@ define <8 x i32> @test_8xi32_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2) { ; GENERIC-LABEL: test_8xi32_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; SKX-NEXT: retq %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> ret <8 x i32> %res } define <8 x i32> @test_8xi32_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; SKX-NEXT: vmovdqa %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3 @@ -10357,15 +10355,15 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] +; SKX-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -10374,13 +10372,13 @@ define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) { ; GENERIC-LABEL: test_8xi32_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; SKX-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> ret <8 x i32> %res @@ -10388,17 +10386,17 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -10409,15 +10407,15 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] +; SKX-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -10428,17 +10426,17 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -10449,15 +10447,15 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; SKX-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -10468,17 +10466,17 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -10489,15 +10487,15 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; SKX-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -10508,13 +10506,13 @@ define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) { ; GENERIC-LABEL: test_8xi32_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] +; SKX-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> ret <8 x i32> %res @@ -10522,17 +10520,17 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -10543,15 +10541,15 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] +; SKX-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -10562,30 +10560,30 @@ define <16 x i32> @test_16xi32_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2) { ; GENERIC-LABEL: test_16xi32_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7] +; SKX-NEXT: retq %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> ret <16 x i32> %res } define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] +; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] +; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 @@ -10595,15 +10593,15 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -10612,17 +10610,17 @@ define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] +; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 @@ -10632,15 +10630,15 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -10649,17 +10647,17 @@ define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] +; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] +; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 @@ -10669,15 +10667,15 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -10686,30 +10684,30 @@ define <16 x i32> @test_16xi32_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2) { ; GENERIC-LABEL: test_16xi32_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3] +; SKX-NEXT: retq %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> ret <16 x i32> %res } define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] +; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3 @@ -10719,15 +10717,15 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] +; SKX-NEXT: retq %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -10736,13 +10734,13 @@ define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p) { ; GENERIC-LABEL: test_16xi32_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1] +; SKX-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> ret <16 x i32> %res @@ -10750,17 +10748,17 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [10:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -10771,15 +10769,15 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] +; SKX-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -10790,17 +10788,17 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [10:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -10811,15 +10809,15 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] +; SKX-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -10830,17 +10828,17 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -10851,15 +10849,15 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] +; SKX-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -10870,13 +10868,13 @@ define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p) { ; GENERIC-LABEL: test_16xi32_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7] +; SKX-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> ret <16 x i32> %res @@ -10884,17 +10882,17 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [10:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -10905,15 +10903,15 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] +; SKX-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -10924,30 +10922,30 @@ define <4 x i64> @test_4xi64_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2) { ; GENERIC-LABEL: test_4xi64_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; SKX-NEXT: retq %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> ret <4 x i64> %res } define <4 x i64> @test_4xi64_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] +; SKX-NEXT: vmovdqa %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 @@ -10957,15 +10955,15 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -10974,17 +10972,17 @@ define <4 x i64> @test_4xi64_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] +; SKX-NEXT: vmovdqa %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 @@ -10994,15 +10992,15 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -11011,17 +11009,17 @@ define <4 x i64> @test_4xi64_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] +; SKX-NEXT: vmovdqa %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 @@ -11031,15 +11029,15 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -11048,30 +11046,30 @@ define <4 x i64> @test_4xi64_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2) { ; GENERIC-LABEL: test_4xi64_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; SKX-NEXT: retq %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> ret <4 x i64> %res } define <4 x i64> @test_4xi64_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] +; SKX-NEXT: vmovdqa %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3 @@ -11081,15 +11079,15 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] +; SKX-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -11098,13 +11096,13 @@ define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) { ; GENERIC-LABEL: test_4xi64_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; SKX-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> ret <4 x i64> %res @@ -11112,17 +11110,17 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -11133,15 +11131,15 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] +; SKX-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -11152,17 +11150,17 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -11173,15 +11171,15 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] +; SKX-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -11192,17 +11190,17 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -11213,15 +11211,15 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] +; SKX-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -11232,13 +11230,13 @@ define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) { ; GENERIC-LABEL: test_4xi64_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; SKX-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> ret <4 x i64> %res @@ -11246,17 +11244,17 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] +; SKX-NEXT: vmovdqa %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -11267,15 +11265,15 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] +; SKX-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -11286,30 +11284,30 @@ define <8 x i64> @test_8xi64_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2) { ; GENERIC-LABEL: test_8xi64_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] +; SKX-NEXT: retq %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> ret <8 x i64> %res } define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] +; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] +; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 @@ -11319,15 +11317,15 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -11336,17 +11334,17 @@ define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] +; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] +; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 @@ -11356,15 +11354,15 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -11373,17 +11371,17 @@ define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] +; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] +; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 @@ -11393,15 +11391,15 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -11410,30 +11408,30 @@ define <8 x i64> @test_8xi64_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2) { ; GENERIC-LABEL: test_8xi64_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] +; SKX-NEXT: retq %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> ret <8 x i64> %res } define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] +; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00] -; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] +; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3 @@ -11443,15 +11441,15 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] +; SKX-NEXT: retq %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -11460,13 +11458,13 @@ define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) { ; GENERIC-LABEL: test_8xi64_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] +; SKX-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> ret <8 x i64> %res @@ -11474,17 +11472,17 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -11495,15 +11493,15 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] +; SKX-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -11514,17 +11512,17 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [10:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -11535,15 +11533,15 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] +; SKX-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -11554,17 +11552,17 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [10:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -11575,15 +11573,15 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] +; SKX-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -11594,13 +11592,13 @@ define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) { ; GENERIC-LABEL: test_8xi64_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] +; SKX-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> ret <8 x i64> %res @@ -11608,17 +11606,17 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] -; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] +; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00] -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] +; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -11629,15 +11627,15 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] +; SKX-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -11648,30 +11646,30 @@ define <4 x float> @test_4xfloat_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2) { ; GENERIC-LABEL: test_4xfloat_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SKX-NEXT: retq %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res } define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; GENERIC-NEXT: vmovaps %xmm2, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SKX-NEXT: vmovaps %xmm2, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 @@ -11681,15 +11679,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer @@ -11698,17 +11696,17 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; GENERIC-NEXT: vmovaps %xmm2, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SKX-NEXT: vmovaps %xmm2, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 @@ -11718,15 +11716,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer @@ -11735,17 +11733,17 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; GENERIC-NEXT: vmovaps %xmm2, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_low_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SKX-NEXT: vmovaps %xmm2, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 @@ -11755,15 +11753,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer @@ -11772,30 +11770,30 @@ define <4 x float> @test_4xfloat_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2) { ; GENERIC-LABEL: test_4xfloat_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SKX-NEXT: retq %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res } define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; GENERIC-NEXT: vmovaps %xmm2, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SKX-NEXT: vmovaps %xmm2, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 @@ -11805,15 +11803,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer @@ -11822,13 +11820,13 @@ define <4 x float> @test_4xfloat_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) { ; GENERIC-LABEL: test_4xfloat_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res @@ -11836,17 +11834,17 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] +; GENERIC-NEXT: vmovaps %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] +; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -11857,15 +11855,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -11876,17 +11874,17 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] +; GENERIC-NEXT: vmovaps %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] +; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -11897,15 +11895,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -11916,17 +11914,17 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] +; GENERIC-NEXT: vmovaps %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] +; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -11937,15 +11935,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -11956,13 +11954,13 @@ define <4 x float> @test_4xfloat_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) { ; GENERIC-LABEL: test_4xfloat_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res @@ -11970,17 +11968,17 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] +; GENERIC-NEXT: vmovaps %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] +; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -11991,15 +11989,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -12010,30 +12008,30 @@ define <8 x float> @test_8xfloat_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2) { ; GENERIC-LABEL: test_8xfloat_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; SKX-NEXT: retq %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res } define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; GENERIC-NEXT: vmovaps %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; SKX-NEXT: vmovaps %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 @@ -12043,15 +12041,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -12060,17 +12058,17 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; GENERIC-NEXT: vmovaps %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; SKX-NEXT: vmovaps %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 @@ -12080,15 +12078,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -12097,17 +12095,17 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; GENERIC-NEXT: vmovaps %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_low_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; SKX-NEXT: vmovaps %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 @@ -12117,15 +12115,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -12134,30 +12132,30 @@ define <8 x float> @test_8xfloat_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2) { ; GENERIC-LABEL: test_8xfloat_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; SKX-NEXT: retq %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res } define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; GENERIC-NEXT: vmovaps %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; SKX-NEXT: vmovaps %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 @@ -12167,15 +12165,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -12184,13 +12182,13 @@ define <8 x float> @test_8xfloat_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { ; GENERIC-LABEL: test_8xfloat_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res @@ -12198,17 +12196,17 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -12219,15 +12217,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -12238,17 +12236,17 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -12259,15 +12257,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -12278,17 +12276,17 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -12299,15 +12297,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -12318,13 +12316,13 @@ define <8 x float> @test_8xfloat_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { ; GENERIC-LABEL: test_8xfloat_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res @@ -12332,17 +12330,17 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -12353,15 +12351,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -12372,30 +12370,30 @@ define <16 x float> @test_16xfloat_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2) { ; GENERIC-LABEL: test_16xfloat_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; SKX-NEXT: retq %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res } define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 @@ -12405,15 +12403,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -12422,17 +12420,17 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 @@ -12442,15 +12440,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -12459,17 +12457,17 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_low_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 @@ -12479,15 +12477,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -12496,30 +12494,30 @@ define <16 x float> @test_16xfloat_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2) { ; GENERIC-LABEL: test_16xfloat_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; SKX-NEXT: retq %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res } define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 @@ -12529,15 +12527,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -12546,13 +12544,13 @@ define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res @@ -12560,17 +12558,17 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -12581,15 +12579,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -12600,17 +12598,17 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -12621,15 +12619,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -12640,17 +12638,17 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -12661,15 +12659,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -12680,13 +12678,13 @@ define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res @@ -12694,17 +12692,17 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -12715,15 +12713,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -12734,30 +12732,30 @@ define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2) { ; GENERIC-LABEL: test_2xdouble_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SKX-NEXT: retq %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> ret <2 x double> %res } define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] +; GENERIC-NEXT: vmovapd %xmm2, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_masked_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] +; SKX-NEXT: vmovapd %xmm2, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 @@ -12767,15 +12765,15 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] +; SKX-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer @@ -12784,17 +12782,17 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] +; GENERIC-NEXT: vmovapd %xmm2, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_masked_unpack_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] +; SKX-NEXT: vmovapd %xmm2, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 @@ -12804,15 +12802,15 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] +; SKX-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer @@ -12821,13 +12819,13 @@ define <2 x double> @test_2xdouble_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) { ; GENERIC-LABEL: test_2xdouble_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; SKX-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> ret <2 x double> %res @@ -12835,17 +12833,17 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] -; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] +; GENERIC-NEXT: vmovapd %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] -; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] +; SKX-NEXT: vmovapd %xmm1, %xmm0 +; SKX-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer @@ -12856,15 +12854,15 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] +; SKX-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer @@ -12875,17 +12873,17 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] -; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] +; GENERIC-NEXT: vmovapd %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] -; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] +; SKX-NEXT: vmovapd %xmm1, %xmm0 +; SKX-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer @@ -12896,15 +12894,15 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] +; SKX-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer @@ -12915,30 +12913,30 @@ define <4 x double> @test_4xdouble_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2) { ; GENERIC-LABEL: test_4xdouble_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; SKX-NEXT: retq %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res } define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; GENERIC-NEXT: vmovapd %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; SKX-NEXT: vmovapd %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 @@ -12948,15 +12946,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -12965,17 +12963,17 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; GENERIC-NEXT: vmovapd %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; SKX-NEXT: vmovapd %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 @@ -12985,15 +12983,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -13002,17 +13000,17 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; GENERIC-NEXT: vmovapd %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_low_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; SKX-NEXT: vmovapd %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 @@ -13022,15 +13020,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -13039,30 +13037,30 @@ define <4 x double> @test_4xdouble_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2) { ; GENERIC-LABEL: test_4xdouble_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; SKX-NEXT: retq %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res } define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; GENERIC-NEXT: vmovapd %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; SKX-NEXT: vmovapd %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 @@ -13072,15 +13070,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -13089,13 +13087,13 @@ define <4 x double> @test_4xdouble_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { ; GENERIC-LABEL: test_4xdouble_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res @@ -13103,17 +13101,17 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -13124,15 +13122,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -13143,17 +13141,17 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -13164,15 +13162,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -13183,17 +13181,17 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -13204,15 +13202,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -13223,13 +13221,13 @@ define <4 x double> @test_4xdouble_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { ; GENERIC-LABEL: test_4xdouble_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res @@ -13237,17 +13235,17 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -13258,15 +13256,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -13277,30 +13275,30 @@ define <8 x double> @test_8xdouble_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2) { ; GENERIC-LABEL: test_8xdouble_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: retq %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res } define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; GENERIC-NEXT: vmovapd %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: vmovapd %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 @@ -13310,15 +13308,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -13327,17 +13325,17 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; GENERIC-NEXT: vmovapd %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: vmovapd %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 @@ -13347,15 +13345,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -13364,17 +13362,17 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; GENERIC-NEXT: vmovapd %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_low_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: vmovapd %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 @@ -13384,15 +13382,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -13401,30 +13399,30 @@ define <8 x double> @test_8xdouble_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2) { ; GENERIC-LABEL: test_8xdouble_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: retq %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res } define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; GENERIC-NEXT: vmovapd %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: vmovapd %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 @@ -13434,15 +13432,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -13451,13 +13449,13 @@ define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res @@ -13465,17 +13463,17 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -13486,15 +13484,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -13505,17 +13503,17 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -13526,15 +13524,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -13545,17 +13543,17 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -13566,15 +13564,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -13585,13 +13583,13 @@ define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res @@ -13599,17 +13597,17 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -13620,15 +13618,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -13639,30 +13637,30 @@ define <4 x float> @test_4xfloat_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2) { ; GENERIC-LABEL: test_4xfloat_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SKX-NEXT: retq %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res } define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; GENERIC-NEXT: vmovaps %xmm2, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SKX-NEXT: vmovaps %xmm2, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 @@ -13672,15 +13670,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer @@ -13689,17 +13687,17 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; GENERIC-NEXT: vmovaps %xmm2, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_high_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SKX-NEXT: vmovaps %xmm2, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 @@ -13709,15 +13707,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer @@ -13726,17 +13724,17 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; GENERIC-NEXT: vmovaps %xmm2, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SKX-NEXT: vmovaps %xmm2, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 @@ -13746,15 +13744,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer @@ -13763,30 +13761,30 @@ define <4 x float> @test_4xfloat_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2) { ; GENERIC-LABEL: test_4xfloat_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SKX-NEXT: retq %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res } define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; GENERIC-NEXT: vmovaps %xmm2, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SKX-NEXT: vmovaps %xmm2, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3 @@ -13796,15 +13794,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SKX-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer @@ -13813,13 +13811,13 @@ define <4 x float> @test_4xfloat_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) { ; GENERIC-LABEL: test_4xfloat_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res @@ -13827,17 +13825,17 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] +; GENERIC-NEXT: vmovaps %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] +; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -13848,15 +13846,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -13867,17 +13865,17 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] +; GENERIC-NEXT: vmovaps %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] +; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -13888,15 +13886,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -13907,17 +13905,17 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] +; GENERIC-NEXT: vmovaps %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] +; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -13928,15 +13926,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -13947,13 +13945,13 @@ define <4 x float> @test_4xfloat_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) { ; GENERIC-LABEL: test_4xfloat_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res @@ -13961,17 +13959,17 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] +; GENERIC-NEXT: vmovaps %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] +; SKX-NEXT: vmovaps %xmm1, %xmm0 +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -13982,15 +13980,15 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) { ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] +; SKX-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %cmp = icmp eq <4 x i32> %mask, zeroinitializer @@ -14001,30 +13999,30 @@ define <8 x float> @test_8xfloat_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2) { ; GENERIC-LABEL: test_8xfloat_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; SKX-NEXT: retq %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res } define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; GENERIC-NEXT: vmovaps %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; SKX-NEXT: vmovaps %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 @@ -14034,15 +14032,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -14051,17 +14049,17 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; GENERIC-NEXT: vmovaps %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_high_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; SKX-NEXT: vmovaps %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 @@ -14071,15 +14069,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -14088,17 +14086,17 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; GENERIC-NEXT: vmovaps %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; SKX-NEXT: vmovaps %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 @@ -14108,15 +14106,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -14125,30 +14123,30 @@ define <8 x float> @test_8xfloat_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2) { ; GENERIC-LABEL: test_8xfloat_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; SKX-NEXT: retq %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res } define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; GENERIC-NEXT: vmovaps %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; SKX-NEXT: vmovaps %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3 @@ -14158,15 +14156,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; SKX-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer @@ -14175,13 +14173,13 @@ define <8 x float> @test_8xfloat_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { ; GENERIC-LABEL: test_8xfloat_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res @@ -14189,17 +14187,17 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -14210,15 +14208,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -14229,17 +14227,17 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -14250,15 +14248,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -14269,17 +14267,17 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -14290,15 +14288,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -14309,13 +14307,13 @@ define <8 x float> @test_8xfloat_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { ; GENERIC-LABEL: test_8xfloat_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res @@ -14323,17 +14321,17 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; GENERIC-NEXT: vmovaps %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; SKX-NEXT: vmovaps %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -14344,15 +14342,15 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) { ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; SKX-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %cmp = icmp eq <8 x i32> %mask, zeroinitializer @@ -14363,30 +14361,30 @@ define <16 x float> @test_16xfloat_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2) { ; GENERIC-LABEL: test_16xfloat_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; SKX-NEXT: retq %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res } define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 @@ -14396,15 +14394,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -14413,17 +14411,17 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_high_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 @@ -14433,15 +14431,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -14450,17 +14448,17 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 @@ -14470,15 +14468,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -14487,30 +14485,30 @@ define <16 x float> @test_16xfloat_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2) { ; GENERIC-LABEL: test_16xfloat_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; SKX-NEXT: retq %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res } define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; GENERIC-NEXT: vmovaps %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; SKX-NEXT: vmovaps %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3 @@ -14520,15 +14518,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; SKX-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer @@ -14537,13 +14535,13 @@ define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res @@ -14551,17 +14549,17 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -14572,15 +14570,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -14591,17 +14589,17 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -14612,15 +14610,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -14631,17 +14629,17 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -14652,15 +14650,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -14671,13 +14669,13 @@ define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { ; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res @@ -14685,17 +14683,17 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; GENERIC-NEXT: vmovaps %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; SKX-NEXT: vmovaps %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -14706,15 +14704,15 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) { ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] +; SKX-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %cmp = icmp eq <16 x i32> %mask, zeroinitializer @@ -14725,30 +14723,30 @@ define <2 x double> @test_2xdouble_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2) { ; GENERIC-LABEL: test_2xdouble_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; SKX-NEXT: retq %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> ret <2 x double> %res } define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] +; GENERIC-NEXT: vmovapd %xmm2, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_masked_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] +; SKX-NEXT: vmovapd %xmm2, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 @@ -14758,15 +14756,15 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] +; SKX-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer @@ -14775,17 +14773,17 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] +; GENERIC-NEXT: vmovapd %xmm2, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_masked_unpack_high_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] +; SKX-NEXT: vmovapd %xmm2, %xmm0 +; SKX-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3 @@ -14795,15 +14793,15 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] +; SKX-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer @@ -14812,13 +14810,13 @@ define <2 x double> @test_2xdouble_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) { ; GENERIC-LABEL: test_2xdouble_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] +; SKX-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> ret <2 x double> %res @@ -14826,17 +14824,17 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] +; GENERIC-NEXT: vmovapd %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] +; SKX-NEXT: vmovapd %xmm1, %xmm0 +; SKX-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer @@ -14847,15 +14845,15 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] +; SKX-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer @@ -14866,17 +14864,17 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] +; GENERIC-NEXT: vmovapd %xmm1, %xmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] +; SKX-NEXT: vmovapd %xmm1, %xmm0 +; SKX-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer @@ -14887,15 +14885,15 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) { ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] +; SKX-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %cmp = icmp eq <2 x i64> %mask, zeroinitializer @@ -14906,30 +14904,30 @@ define <4 x double> @test_4xdouble_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2) { ; GENERIC-LABEL: test_4xdouble_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; SKX-NEXT: retq %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res } define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; GENERIC-NEXT: vmovapd %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; SKX-NEXT: vmovapd %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 @@ -14939,15 +14937,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -14956,17 +14954,17 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; GENERIC-NEXT: vmovapd %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_high_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; SKX-NEXT: vmovapd %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 @@ -14976,15 +14974,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -14993,17 +14991,17 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; GENERIC-NEXT: vmovapd %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; SKX-NEXT: vmovapd %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 @@ -15013,15 +15011,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -15030,30 +15028,30 @@ define <4 x double> @test_4xdouble_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2) { ; GENERIC-LABEL: test_4xdouble_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; SKX-NEXT: retq %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res } define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; GENERIC-NEXT: vmovapd %ymm2, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; SKX-NEXT: vmovapd %ymm2, %ymm0 +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3 @@ -15063,15 +15061,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; SKX-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer @@ -15080,13 +15078,13 @@ define <4 x double> @test_4xdouble_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { ; GENERIC-LABEL: test_4xdouble_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res @@ -15094,17 +15092,17 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -15115,15 +15113,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -15134,17 +15132,17 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -15155,15 +15153,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -15174,17 +15172,17 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -15195,15 +15193,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -15214,13 +15212,13 @@ define <4 x double> @test_4xdouble_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { ; GENERIC-LABEL: test_4xdouble_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res @@ -15228,17 +15226,17 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] +; GENERIC-NEXT: vmovapd %ymm1, %ymm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] +; SKX-NEXT: vmovapd %ymm1, %ymm0 +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -15249,15 +15247,15 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) { ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] +; SKX-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %cmp = icmp eq <4 x i64> %mask, zeroinitializer @@ -15268,30 +15266,30 @@ define <8 x double> @test_8xdouble_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2) { ; GENERIC-LABEL: test_8xdouble_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: retq %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res } define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; GENERIC-NEXT: vmovapd %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: vmovapd %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 @@ -15301,15 +15299,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -15318,17 +15316,17 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; GENERIC-NEXT: vmovapd %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_high_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: vmovapd %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 @@ -15338,15 +15336,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -15355,17 +15353,17 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; GENERIC-NEXT: vmovapd %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: vmovapd %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 @@ -15375,15 +15373,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -15392,30 +15390,30 @@ define <8 x double> @test_8xdouble_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2) { ; GENERIC-LABEL: test_8xdouble_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: retq %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res } define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; GENERIC-NEXT: vmovapd %zmm2, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: vmovapd %zmm2, %zmm0 +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3 @@ -15425,15 +15423,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; SKX-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer @@ -15442,13 +15440,13 @@ define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res @@ -15456,17 +15454,17 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -15477,15 +15475,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -15496,17 +15494,17 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -15517,15 +15515,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -15536,17 +15534,17 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -15557,15 +15555,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -15576,13 +15574,13 @@ define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { ; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res @@ -15590,17 +15588,17 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; GENERIC-NEXT: vmovapd %zmm1, %zmm0 +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; SKX-NEXT: vmovapd %zmm1, %zmm0 +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer @@ -15611,15 +15609,15 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) { ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33] -; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; GENERIC-NEXT: retq ; ; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: ; SKX: # %bb.0: -; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 +; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; SKX-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %cmp = icmp eq <8 x i64> %mask, zeroinitializer Index: test/CodeGen/X86/avx512vpopcntdq-schedule.ll =================================================================== --- test/CodeGen/X86/avx512vpopcntdq-schedule.ll +++ test/CodeGen/X86/avx512vpopcntdq-schedule.ll @@ -1,42 +1,42 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=icelake-client | FileCheck %s --check-prefix=ICELAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=icelake-server | FileCheck %s --check-prefix=ICELAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-client | FileCheck %s --check-prefix=ICELAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server | FileCheck %s --check-prefix=ICELAKE define void @test_vpopcntd(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> *%a2, i16 %a3) { ; GENERIC-LABEL: test_vpopcntd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovw %esi, %k1 # sched: [1:0.33] +; GENERIC-NEXT: kmovw %esi, %k1 ; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] -; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:0.50] -; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:0.50] +; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 +; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 +; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} +; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 +; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} +; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; ICELAKE-LABEL: test_vpopcntd: ; ICELAKE: # %bb.0: -; ICELAKE-NEXT: kmovd %esi, %k1 # sched: [1:1.00] +; ICELAKE-NEXT: kmovd %esi, %k1 ; ICELAKE-NEXT: #APP -; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:1.00] -; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:1.00] -; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] -; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:1.00] -; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:1.00] -; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] -; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:1.00] -; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:1.00] -; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 +; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} +; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} +; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 +; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} +; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} +; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 +; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} +; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} ; ICELAKE-NEXT: #NO_APP -; ICELAKE-NEXT: vzeroupper # sched: [0:0.67] -; ICELAKE-NEXT: retq # sched: [7:1.00] +; ICELAKE-NEXT: vzeroupper +; ICELAKE-NEXT: retq tail call void asm "vpopcntd $1, $0 \0A\09 vpopcntd $1, $0 {$3} \0A\09 vpopcntd $1, $0 {$3} {z} \0A\09 vpopcntd $2, $0 \0A\09 vpopcntd $2, $0 {$3} \0A\09 vpopcntd $2, $0 {$3} {z} \0A\09 vpopcntd $2{1to16}, $0 \0A\09 vpopcntd $2{1to16}, $0 {$3} \0A\09 vpopcntd $2{1to16}, $0 {$3} {z}", "v,v,*m,^Yk"(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> *%a2, i16 %a3) nounwind ret void } @@ -44,37 +44,37 @@ define void @test_vpopcntq(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> *%a2, i8 %a3) { ; GENERIC-LABEL: test_vpopcntq: ; GENERIC: # %bb.0: -; GENERIC-NEXT: kmovw %esi, %k1 # sched: [1:0.33] +; GENERIC-NEXT: kmovw %esi, %k1 ; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] -; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:0.50] -; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:0.50] -; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:0.50] +; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 +; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} +; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} +; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 +; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} +; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 +; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} +; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} ; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq ; ; ICELAKE-LABEL: test_vpopcntq: ; ICELAKE: # %bb.0: -; ICELAKE-NEXT: kmovd %esi, %k1 # sched: [1:1.00] +; ICELAKE-NEXT: kmovd %esi, %k1 ; ICELAKE-NEXT: #APP -; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:1.00] -; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:1.00] -; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] -; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:1.00] -; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:1.00] -; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] -; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:1.00] -; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:1.00] -; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 +; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} +; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} +; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 +; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} +; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} +; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 +; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} +; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} ; ICELAKE-NEXT: #NO_APP -; ICELAKE-NEXT: vzeroupper # sched: [0:0.67] -; ICELAKE-NEXT: retq # sched: [7:1.00] +; ICELAKE-NEXT: vzeroupper +; ICELAKE-NEXT: retq tail call void asm "vpopcntq $1, $0 \0A\09 vpopcntq $1, $0 {$3} \0A\09 vpopcntq $1, $0 {$3} {z} \0A\09 vpopcntq $2, $0 \0A\09 vpopcntq $2, $0 {$3} \0A\09 vpopcntq $2, $0 {$3} {z} \0A\09 vpopcntq $2{1to8}, $0 \0A\09 vpopcntq $2{1to8}, $0 {$3} \0A\09 vpopcntq $2{1to8}, $0 {$3} {z}", "v,v,*m,^Yk"(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> *%a2, i8 %a3) nounwind ret void } Index: test/CodeGen/X86/bmi-schedule.ll =================================================================== --- test/CodeGen/X86/bmi-schedule.ll +++ test/CodeGen/X86/bmi-schedule.ll @@ -1,763 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_andn_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_andn_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50] -; HASWELL-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_andn_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50] -; BROADWELL-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_andn_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50] -; SKYLAKE-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_andn_i32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: andnl (%rdx), %edi, %eax # sched: [5:0.50] -; BDVER2-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_andn_i32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: andnl (%rdx), %edi, %eax # sched: [4:1.00] -; BTVER2-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50] -; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_andn_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: andnl (%rdx), %edi, %eax # sched: [5:0.50] -; ZNVER1-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.25] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a2 - %2 = xor i32 %a0, -1 - %3 = and i32 %2, %a1 - %4 = and i32 %2, %1 - %5 = add i32 %3, %4 - ret i32 %5 -} - -define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: test_andn_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_andn_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50] -; HASWELL-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_andn_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50] -; BROADWELL-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_andn_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50] -; SKYLAKE-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_andn_i64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: andnq (%rdx), %rdi, %rax # sched: [5:0.50] -; BDVER2-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_andn_i64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: andnq (%rdx), %rdi, %rax # sched: [4:1.00] -; BTVER2-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50] -; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_andn_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: andnq (%rdx), %rdi, %rax # sched: [5:0.50] -; ZNVER1-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.25] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a2 - %2 = xor i64 %a0, -1 - %3 = and i64 %2, %a1 - %4 = and i64 %2, %1 - %5 = add i64 %3, %4 - ret i64 %5 -} - -define i32 @test_bextr_i32(i32 %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_bextr_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:1.00] -; GENERIC-NEXT: bextrl %edi, %esi, %eax # sched: [2:1.00] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bextr_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:0.50] -; HASWELL-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bextr_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:0.50] -; BROADWELL-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bextr_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:0.50] -; SKYLAKE-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_bextr_i32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: bextrl %edi, (%rdx), %ecx # sched: [6:0.50] -; BDVER2-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_bextr_i32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: bextrl %edi, (%rdx), %ecx # sched: [4:1.00] -; BTVER2-NEXT: bextrl %edi, %esi, %eax # sched: [1:0.50] -; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bextr_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: bextrl %edi, (%rdx), %ecx # sched: [5:0.50] -; ZNVER1-NEXT: bextrl %edi, %esi, %eax # sched: [1:0.25] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a2 - %2 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %1, i32 %a0) - %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a1, i32 %a0) - %4 = add i32 %2, %3 - ret i32 %4 -} -declare i32 @llvm.x86.bmi.bextr.32(i32, i32) - -define i64 @test_bextr_i64(i64 %a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: test_bextr_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:1.00] -; GENERIC-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:1.00] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bextr_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:0.50] -; HASWELL-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bextr_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:0.50] -; BROADWELL-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bextr_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:0.50] -; SKYLAKE-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_bextr_i64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [6:0.50] -; BDVER2-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_bextr_i64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [4:1.00] -; BTVER2-NEXT: bextrq %rdi, %rsi, %rax # sched: [1:0.50] -; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bextr_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [5:0.50] -; ZNVER1-NEXT: bextrq %rdi, %rsi, %rax # sched: [1:0.25] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a2 - %2 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %1, i64 %a0) - %3 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a1, i64 %a0) - %4 = add i64 %2, %3 - ret i64 %4 -} -declare i64 @llvm.x86.bmi.bextr.64(i64, i64) - -define i32 @test_blsi_i32(i32 %a0, i32 *%a1) { -; GENERIC-LABEL: test_blsi_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blsil (%rsi), %ecx # sched: [6:0.50] -; GENERIC-NEXT: blsil %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_blsi_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: blsil (%rsi), %ecx # sched: [6:0.50] -; HASWELL-NEXT: blsil %edi, %eax # sched: [1:0.50] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blsi_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: blsil (%rsi), %ecx # sched: [6:0.50] -; BROADWELL-NEXT: blsil %edi, %eax # sched: [1:0.50] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blsi_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: blsil (%rsi), %ecx # sched: [6:0.50] -; SKYLAKE-NEXT: blsil %edi, %eax # sched: [1:0.50] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_blsi_i32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blsil (%rsi), %ecx # sched: [6:0.50] -; BDVER2-NEXT: blsil %edi, %eax # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_blsi_i32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: blsil (%rsi), %ecx # sched: [5:1.00] -; BTVER2-NEXT: blsil %edi, %eax # sched: [2:1.00] -; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_blsi_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: blsil (%rsi), %ecx # sched: [6:0.50] -; ZNVER1-NEXT: blsil %edi, %eax # sched: [2:0.25] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a1 - %2 = sub i32 0, %1 - %3 = sub i32 0, %a0 - %4 = and i32 %1, %2 - %5 = and i32 %a0, %3 - %6 = add i32 %4, %5 - ret i32 %6 -} - -define i64 @test_blsi_i64(i64 %a0, i64 *%a1) { -; GENERIC-LABEL: test_blsi_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50] -; GENERIC-NEXT: blsiq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_blsi_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50] -; HASWELL-NEXT: blsiq %rdi, %rax # sched: [1:0.50] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blsi_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50] -; BROADWELL-NEXT: blsiq %rdi, %rax # sched: [1:0.50] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blsi_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50] -; SKYLAKE-NEXT: blsiq %rdi, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_blsi_i64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50] -; BDVER2-NEXT: blsiq %rdi, %rax # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_blsi_i64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: blsiq (%rsi), %rcx # sched: [5:1.00] -; BTVER2-NEXT: blsiq %rdi, %rax # sched: [2:1.00] -; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_blsi_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50] -; ZNVER1-NEXT: blsiq %rdi, %rax # sched: [2:0.25] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a1 - %2 = sub i64 0, %1 - %3 = sub i64 0, %a0 - %4 = and i64 %1, %2 - %5 = and i64 %a0, %3 - %6 = add i64 %4, %5 - ret i64 %6 -} - -define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) { -; GENERIC-LABEL: test_blsmsk_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50] -; GENERIC-NEXT: blsmskl %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_blsmsk_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50] -; HASWELL-NEXT: blsmskl %edi, %eax # sched: [1:0.50] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blsmsk_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50] -; BROADWELL-NEXT: blsmskl %edi, %eax # sched: [1:0.50] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blsmsk_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50] -; SKYLAKE-NEXT: blsmskl %edi, %eax # sched: [1:0.50] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_blsmsk_i32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50] -; BDVER2-NEXT: blsmskl %edi, %eax # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_blsmsk_i32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: blsmskl (%rsi), %ecx # sched: [5:1.00] -; BTVER2-NEXT: blsmskl %edi, %eax # sched: [2:1.00] -; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_blsmsk_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50] -; ZNVER1-NEXT: blsmskl %edi, %eax # sched: [2:0.25] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a1 - %2 = sub i32 %1, 1 - %3 = sub i32 %a0, 1 - %4 = xor i32 %1, %2 - %5 = xor i32 %a0, %3 - %6 = add i32 %4, %5 - ret i32 %6 -} - -define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) { -; GENERIC-LABEL: test_blsmsk_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50] -; GENERIC-NEXT: blsmskq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_blsmsk_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50] -; HASWELL-NEXT: blsmskq %rdi, %rax # sched: [1:0.50] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blsmsk_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50] -; BROADWELL-NEXT: blsmskq %rdi, %rax # sched: [1:0.50] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blsmsk_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50] -; SKYLAKE-NEXT: blsmskq %rdi, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_blsmsk_i64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50] -; BDVER2-NEXT: blsmskq %rdi, %rax # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_blsmsk_i64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: blsmskq (%rsi), %rcx # sched: [5:1.00] -; BTVER2-NEXT: blsmskq %rdi, %rax # sched: [2:1.00] -; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_blsmsk_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50] -; ZNVER1-NEXT: blsmskq %rdi, %rax # sched: [2:0.25] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a1 - %2 = sub i64 %1, 1 - %3 = sub i64 %a0, 1 - %4 = xor i64 %1, %2 - %5 = xor i64 %a0, %3 - %6 = add i64 %4, %5 - ret i64 %6 -} - -define i32 @test_blsr_i32(i32 %a0, i32 *%a1) { -; GENERIC-LABEL: test_blsr_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50] -; GENERIC-NEXT: blsrl %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_blsr_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50] -; HASWELL-NEXT: blsrl %edi, %eax # sched: [1:0.50] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blsr_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50] -; BROADWELL-NEXT: blsrl %edi, %eax # sched: [1:0.50] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blsr_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50] -; SKYLAKE-NEXT: blsrl %edi, %eax # sched: [1:0.50] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_blsr_i32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50] -; BDVER2-NEXT: blsrl %edi, %eax # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_blsr_i32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: blsrl (%rsi), %ecx # sched: [5:1.00] -; BTVER2-NEXT: blsrl %edi, %eax # sched: [2:1.00] -; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_blsr_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50] -; ZNVER1-NEXT: blsrl %edi, %eax # sched: [2:0.25] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a1 - %2 = sub i32 %1, 1 - %3 = sub i32 %a0, 1 - %4 = and i32 %1, %2 - %5 = and i32 %a0, %3 - %6 = add i32 %4, %5 - ret i32 %6 -} - -define i64 @test_blsr_i64(i64 %a0, i64 *%a1) { -; GENERIC-LABEL: test_blsr_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50] -; GENERIC-NEXT: blsrq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_blsr_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50] -; HASWELL-NEXT: blsrq %rdi, %rax # sched: [1:0.50] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blsr_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50] -; BROADWELL-NEXT: blsrq %rdi, %rax # sched: [1:0.50] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blsr_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50] -; SKYLAKE-NEXT: blsrq %rdi, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_blsr_i64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50] -; BDVER2-NEXT: blsrq %rdi, %rax # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_blsr_i64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: blsrq (%rsi), %rcx # sched: [5:1.00] -; BTVER2-NEXT: blsrq %rdi, %rax # sched: [2:1.00] -; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_blsr_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50] -; ZNVER1-NEXT: blsrq %rdi, %rax # sched: [2:0.25] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a1 - %2 = sub i64 %1, 1 - %3 = sub i64 %a0, 1 - %4 = and i64 %1, %2 - %5 = and i64 %a0, %3 - %6 = add i64 %4, %5 - ret i64 %6 -} - -define i16 @test_cttz_i16(i16 zeroext %a0, i16 *%a1) { -; GENERIC-LABEL: test_cttz_i16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00] -; GENERIC-NEXT: tzcntw %di, %ax # sched: [3:1.00] -; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cttz_i16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00] -; HASWELL-NEXT: tzcntw %di, %ax # sched: [3:1.00] -; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cttz_i16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00] -; BROADWELL-NEXT: tzcntw %di, %ax # sched: [3:1.00] -; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cttz_i16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00] -; SKYLAKE-NEXT: tzcntw %di, %ax # sched: [3:1.00] -; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cttz_i16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: tzcntw (%rsi), %cx # sched: [6:1.00] -; BDVER2-NEXT: tzcntw %di, %ax # sched: [2:1.00] -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cttz_i16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: tzcntw (%rsi), %cx # sched: [5:1.00] -; BTVER2-NEXT: tzcntw %di, %ax # sched: [2:1.00] -; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cttz_i16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: tzcntw (%rsi), %cx # sched: [6:0.50] -; ZNVER1-NEXT: tzcntw %di, %ax # sched: [2:0.25] -; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i16, i16 *%a1 - %2 = tail call i16 @llvm.cttz.i16( i16 %1, i1 false ) - %3 = tail call i16 @llvm.cttz.i16( i16 %a0, i1 false ) - %4 = or i16 %2, %3 - ret i16 %4 -} -declare i16 @llvm.cttz.i16(i16, i1) - -define i32 @test_cttz_i32(i32 %a0, i32 *%a1) { -; GENERIC-LABEL: test_cttz_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00] -; GENERIC-NEXT: tzcntl %edi, %eax # sched: [3:1.00] -; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cttz_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00] -; HASWELL-NEXT: tzcntl %edi, %eax # sched: [3:1.00] -; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cttz_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00] -; BROADWELL-NEXT: tzcntl %edi, %eax # sched: [3:1.00] -; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cttz_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00] -; SKYLAKE-NEXT: tzcntl %edi, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cttz_i32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: tzcntl (%rsi), %ecx # sched: [6:1.00] -; BDVER2-NEXT: tzcntl %edi, %eax # sched: [2:1.00] -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cttz_i32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: tzcntl (%rsi), %ecx # sched: [5:1.00] -; BTVER2-NEXT: tzcntl %edi, %eax # sched: [2:1.00] -; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cttz_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: tzcntl (%rsi), %ecx # sched: [6:0.50] -; ZNVER1-NEXT: tzcntl %edi, %eax # sched: [2:0.25] -; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a1 - %2 = tail call i32 @llvm.cttz.i32( i32 %1, i1 false ) - %3 = tail call i32 @llvm.cttz.i32( i32 %a0, i1 false ) - %4 = or i32 %2, %3 - ret i32 %4 -} -declare i32 @llvm.cttz.i32(i32, i1) - -define i64 @test_cttz_i64(i64 %a0, i64 *%a1) { -; GENERIC-LABEL: test_cttz_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00] -; GENERIC-NEXT: tzcntq %rdi, %rax # sched: [3:1.00] -; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cttz_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00] -; HASWELL-NEXT: tzcntq %rdi, %rax # sched: [3:1.00] -; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cttz_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00] -; BROADWELL-NEXT: tzcntq %rdi, %rax # sched: [3:1.00] -; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cttz_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00] -; SKYLAKE-NEXT: tzcntq %rdi, %rax # sched: [3:1.00] -; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cttz_i64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: tzcntq (%rsi), %rcx # sched: [6:1.00] -; BDVER2-NEXT: tzcntq %rdi, %rax # sched: [2:1.00] -; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cttz_i64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: tzcntq (%rsi), %rcx # sched: [5:1.00] -; BTVER2-NEXT: tzcntq %rdi, %rax # sched: [2:1.00] -; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cttz_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: tzcntq (%rsi), %rcx # sched: [6:0.50] -; ZNVER1-NEXT: tzcntq %rdi, %rax # sched: [2:0.25] -; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a1 - %2 = tail call i64 @llvm.cttz.i64( i64 %1, i1 false ) - %3 = tail call i64 @llvm.cttz.i64( i64 %a0, i1 false ) - %4 = or i64 %2, %3 - ret i64 %4 -} -declare i64 @llvm.cttz.i64(i64, i1) Index: test/CodeGen/X86/bmi2-schedule.ll =================================================================== --- test/CodeGen/X86/bmi2-schedule.ll +++ test/CodeGen/X86/bmi2-schedule.ll @@ -1,811 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define i32 @test_bzhi_i32(i32 %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_bzhi_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:1.00] -; GENERIC-NEXT: bzhil %edi, %esi, %eax # sched: [1:1.00] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bzhi_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:0.50] -; HASWELL-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bzhi_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:0.50] -; BROADWELL-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bzhi_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:0.50] -; SKYLAKE-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_bzhi_i32: -; KNL: # %bb.0: -; KNL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:0.50] -; KNL-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50] -; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_bzhi_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: bzhil %edi, (%rdx), %ecx # sched: [5:0.50] -; ZNVER1-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.25] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a2 - %2 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %1, i32 %a0) - %3 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %a1, i32 %a0) - %4 = add i32 %2, %3 - ret i32 %4 -} -declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) - -define i64 @test_bzhi_i64(i64 %a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: test_bzhi_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:1.00] -; GENERIC-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:1.00] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bzhi_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:0.50] -; HASWELL-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bzhi_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:0.50] -; BROADWELL-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bzhi_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:0.50] -; SKYLAKE-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_bzhi_i64: -; KNL: # %bb.0: -; KNL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:0.50] -; KNL-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50] -; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_bzhi_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [5:0.50] -; ZNVER1-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.25] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a2 - %2 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %1, i64 %a0) - %3 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %a1, i64 %a0) - %4 = add i64 %2, %3 - ret i64 %4 -} -declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) - -define void @test_mulx_i32(i32 %a0, i32 %a1, i32* %a2) optsize { -; GENERIC-LABEL: test_mulx_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00] -; GENERIC-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_mulx_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00] -; HASWELL-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mulx_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00] -; BROADWELL-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mulx_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00] -; SKYLAKE-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_mulx_i32: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00] -; KNL-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_mulx_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: mulxl %esi, %esi, %edi # sched: [3:2.00] -; ZNVER1-NEXT: mulxl (%rdx), %esi, %edi # sched: [8:2.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "mulx $1, $1, $0 \0A\09 mulx $2, $1, $0 ", "r,r,*m"(i32 %a0, i32 %a1, i32* %a2) nounwind - ret void -} - -define void @test_mulx_i64(i64 %a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: test_mulx_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00] -; GENERIC-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_mulx_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00] -; HASWELL-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mulx_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00] -; BROADWELL-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mulx_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00] -; SKYLAKE-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_mulx_i64: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00] -; KNL-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_mulx_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: mulxq %rsi, %rsi, %rdi # sched: [3:1.00] -; ZNVER1-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [8:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "mulx $1, $1, $0 \0A\09 mulx $2, $1, $0 ", "r,r,*m"(i64 %a0, i64 %a1, i64* %a2) nounwind - ret void -} - -define i32 @test_pdep_i32(i32 %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_pdep_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pdepl (%rdx), %edi, %ecx # sched: [6:0.50] -; GENERIC-NEXT: pdepl %esi, %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pdep_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [8:1.00] -; HASWELL-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pdep_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [8:1.00] -; BROADWELL-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pdep_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pdepl (%rdx), %edi, %ecx # sched: [8:1.00] -; SKYLAKE-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_pdep_i32: -; KNL: # %bb.0: -; KNL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [8:1.00] -; KNL-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00] -; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pdep_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pdepl (%rdx), %edi, %ecx # sched: [100:0.25] -; ZNVER1-NEXT: pdepl %esi, %edi, %eax # sched: [100:0.25] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a2 - %2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %a0, i32 %1) - %3 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %a0, i32 %a1) - %4 = add i32 %2, %3 - ret i32 %4 -} -declare i32 @llvm.x86.bmi.pdep.32(i32, i32) - -define i64 @test_pdep_i64(i64 %a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: test_pdep_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [6:0.50] -; GENERIC-NEXT: pdepq %rsi, %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pdep_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [8:1.00] -; HASWELL-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pdep_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [8:1.00] -; BROADWELL-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pdep_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [8:1.00] -; SKYLAKE-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_pdep_i64: -; KNL: # %bb.0: -; KNL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [8:1.00] -; KNL-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00] -; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pdep_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [100:0.25] -; ZNVER1-NEXT: pdepq %rsi, %rdi, %rax # sched: [100:0.25] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a2 - %2 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %a0, i64 %1) - %3 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %a0, i64 %a1) - %4 = add i64 %2, %3 - ret i64 %4 -} -declare i64 @llvm.x86.bmi.pdep.64(i64, i64) - -define i32 @test_pext_i32(i32 %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_pext_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pextl (%rdx), %edi, %ecx # sched: [6:0.50] -; GENERIC-NEXT: pextl %esi, %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pext_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pextl (%rdx), %edi, %ecx # sched: [8:1.00] -; HASWELL-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pext_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pextl (%rdx), %edi, %ecx # sched: [8:1.00] -; BROADWELL-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pext_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pextl (%rdx), %edi, %ecx # sched: [8:1.00] -; SKYLAKE-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_pext_i32: -; KNL: # %bb.0: -; KNL-NEXT: pextl (%rdx), %edi, %ecx # sched: [8:1.00] -; KNL-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00] -; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pext_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pextl (%rdx), %edi, %ecx # sched: [100:0.25] -; ZNVER1-NEXT: pextl %esi, %edi, %eax # sched: [100:0.25] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a2 - %2 = tail call i32 @llvm.x86.bmi.pext.32(i32 %a0, i32 %1) - %3 = tail call i32 @llvm.x86.bmi.pext.32(i32 %a0, i32 %a1) - %4 = add i32 %2, %3 - ret i32 %4 -} -declare i32 @llvm.x86.bmi.pext.32(i32, i32) - -define i64 @test_pext_i64(i64 %a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: test_pext_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pextq (%rdx), %rdi, %rcx # sched: [6:0.50] -; GENERIC-NEXT: pextq %rsi, %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pext_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [8:1.00] -; HASWELL-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pext_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [8:1.00] -; BROADWELL-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pext_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pextq (%rdx), %rdi, %rcx # sched: [8:1.00] -; SKYLAKE-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_pext_i64: -; KNL: # %bb.0: -; KNL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [8:1.00] -; KNL-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00] -; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_pext_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pextq (%rdx), %rdi, %rcx # sched: [100:0.25] -; ZNVER1-NEXT: pextq %rsi, %rdi, %rax # sched: [100:0.25] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a2 - %2 = tail call i64 @llvm.x86.bmi.pext.64(i64 %a0, i64 %1) - %3 = tail call i64 @llvm.x86.bmi.pext.64(i64 %a0, i64 %a1) - %4 = add i64 %2, %3 - ret i64 %4 -} -declare i64 @llvm.x86.bmi.pext.64(i64, i64) - -define i32 @test_rorx_i32(i32 %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_rorx_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50] -; GENERIC-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rorx_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50] -; HASWELL-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rorx_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50] -; BROADWELL-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rorx_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50] -; SKYLAKE-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_rorx_i32: -; KNL: # %bb.0: -; KNL-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50] -; KNL-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50] -; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_rorx_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: rorxl $5, (%rdx), %eax # sched: [5:0.50] -; ZNVER1-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.25] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a2 - %2 = lshr i32 %a0, 5 - %3 = shl i32 %a0, 27 - %4 = or i32 %2, %3 - %5 = lshr i32 %1, 5 - %6 = shl i32 %1, 27 - %7 = or i32 %5, %6 - %8 = add i32 %4, %7 - ret i32 %8 -} - -define i64 @test_rorx_i64(i64 %a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: test_rorx_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50] -; GENERIC-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rorx_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50] -; HASWELL-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rorx_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50] -; BROADWELL-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rorx_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50] -; SKYLAKE-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_rorx_i64: -; KNL: # %bb.0: -; KNL-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50] -; KNL-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50] -; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_rorx_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: rorxq $5, (%rdx), %rax # sched: [5:0.50] -; ZNVER1-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.25] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a2 - %2 = lshr i64 %a0, 5 - %3 = shl i64 %a0, 59 - %4 = or i64 %2, %3 - %5 = lshr i64 %1, 5 - %6 = shl i64 %1, 59 - %7 = or i64 %5, %6 - %8 = add i64 %4, %7 - ret i64 %8 -} - -define i32 @test_sarx_i32(i32 %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_sarx_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50] -; GENERIC-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sarx_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50] -; HASWELL-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sarx_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50] -; BROADWELL-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sarx_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50] -; SKYLAKE-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_sarx_i32: -; KNL: # %bb.0: -; KNL-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50] -; KNL-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50] -; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_sarx_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: sarxl %esi, (%rdx), %eax # sched: [5:0.50] -; ZNVER1-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.25] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a2 - %2 = ashr i32 %a0, %a1 - %3 = ashr i32 %1, %a1 - %4 = add i32 %2, %3 - ret i32 %4 -} - -define i64 @test_sarx_i64(i64 %a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: test_sarx_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50] -; GENERIC-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sarx_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50] -; HASWELL-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sarx_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50] -; BROADWELL-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sarx_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50] -; SKYLAKE-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_sarx_i64: -; KNL: # %bb.0: -; KNL-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50] -; KNL-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50] -; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_sarx_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: sarxq %rsi, (%rdx), %rax # sched: [5:0.50] -; ZNVER1-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.25] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a2 - %2 = ashr i64 %a0, %a1 - %3 = ashr i64 %1, %a1 - %4 = add i64 %2, %3 - ret i64 %4 -} - -define i32 @test_shlx_i32(i32 %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_shlx_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50] -; GENERIC-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_shlx_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50] -; HASWELL-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_shlx_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50] -; BROADWELL-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_shlx_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50] -; SKYLAKE-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_shlx_i32: -; KNL: # %bb.0: -; KNL-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50] -; KNL-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50] -; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_shlx_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: shlxl %esi, (%rdx), %eax # sched: [5:0.50] -; ZNVER1-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.25] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a2 - %2 = shl i32 %a0, %a1 - %3 = shl i32 %1, %a1 - %4 = add i32 %2, %3 - ret i32 %4 -} - -define i64 @test_shlx_i64(i64 %a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: test_shlx_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50] -; GENERIC-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_shlx_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50] -; HASWELL-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_shlx_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50] -; BROADWELL-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_shlx_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50] -; SKYLAKE-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_shlx_i64: -; KNL: # %bb.0: -; KNL-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50] -; KNL-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50] -; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_shlx_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: shlxq %rsi, (%rdx), %rax # sched: [5:0.50] -; ZNVER1-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.25] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a2 - %2 = shl i64 %a0, %a1 - %3 = shl i64 %1, %a1 - %4 = add i64 %2, %3 - ret i64 %4 -} - -define i32 @test_shrx_i32(i32 %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_shrx_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50] -; GENERIC-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_shrx_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50] -; HASWELL-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_shrx_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50] -; BROADWELL-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_shrx_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50] -; SKYLAKE-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_shrx_i32: -; KNL: # %bb.0: -; KNL-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50] -; KNL-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50] -; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_shrx_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: shrxl %esi, (%rdx), %eax # sched: [5:0.50] -; ZNVER1-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.25] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a2 - %2 = lshr i32 %a0, %a1 - %3 = lshr i32 %1, %a1 - %4 = add i32 %2, %3 - ret i32 %4 -} - -define i64 @test_shrx_i64(i64 %a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: test_shrx_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50] -; GENERIC-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_shrx_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50] -; HASWELL-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_shrx_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50] -; BROADWELL-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_shrx_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50] -; SKYLAKE-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_shrx_i64: -; KNL: # %bb.0: -; KNL-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50] -; KNL-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50] -; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_shrx_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: shrxq %rsi, (%rdx), %rax # sched: [5:0.50] -; ZNVER1-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.25] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a2 - %2 = lshr i64 %a0, %a1 - %3 = lshr i64 %1, %a1 - %4 = add i64 %2, %3 - ret i64 %4 -} Index: test/CodeGen/X86/clflushopt-schedule.ll =================================================================== --- test/CodeGen/X86/clflushopt-schedule.ll +++ test/CodeGen/X86/clflushopt-schedule.ll @@ -1,36 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+clflushopt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define void @clflushopt(i8* %p) nounwind { -; GENERIC-LABEL: clflushopt: -; GENERIC: # %bb.0: -; GENERIC-NEXT: clflushopt (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GLM-LABEL: clflushopt: -; GLM: # %bb.0: -; GLM-NEXT: clflushopt (%rdi) # sched: [3:1.00] -; GLM-NEXT: retq # sched: [4:1.00] -; -; SKYLAKE-LABEL: clflushopt: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: clflushopt (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: clflushopt: -; SKX: # %bb.0: -; SKX-NEXT: clflushopt (%rdi) # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: clflushopt: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: clflushopt (%rdi) # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void @llvm.x86.clflushopt(i8* %p) - ret void -} -declare void @llvm.x86.clflushopt(i8*) nounwind Index: test/CodeGen/X86/clwb-schedule.ll =================================================================== --- test/CodeGen/X86/clwb-schedule.ll +++ test/CodeGen/X86/clwb-schedule.ll @@ -1,18 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+clwb | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX - -define void @clwb(i8* %a0) nounwind { -; GENERIC-LABEL: clwb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: clwb (%rdi) # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SKX-LABEL: clwb: -; SKX: # %bb.0: -; SKX-NEXT: clwb (%rdi) # sched: [5:0.50] -; SKX-NEXT: retq # sched: [7:1.00] - tail call void @llvm.x86.clwb(i8* %a0) - ret void -} -declare void @llvm.x86.clwb(i8*) nounwind Index: test/CodeGen/X86/clzero-schedule.ll =================================================================== --- test/CodeGen/X86/clzero-schedule.ll +++ test/CodeGen/X86/clzero-schedule.ll @@ -1,20 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+clzero | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1 - -define void @test_clzero(i8* %p) { -; GENERIC-LABEL: test_clzero: -; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; GENERIC-NEXT: clzero # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ZNVER1-LABEL: test_clzero: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] -; ZNVER1-NEXT: clzero # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void @llvm.x86.clzero(i8* %p) - ret void -} -declare void @llvm.x86.clzero(i8*) Index: test/CodeGen/X86/cmov-schedule.ll =================================================================== --- test/CodeGen/X86/cmov-schedule.ll +++ test/CodeGen/X86/cmov-schedule.ll @@ -1,2203 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define void @test_cmov_16(i16 %a0, i16 %a1, i16 *%a2) optsize { -; GENERIC-LABEL: test_cmov_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmovow %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovnow %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovbw %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovbw %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovbw %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovaew %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovaew %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovaew %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovew %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovew %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovnew %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovnew %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovbew %si, %di # sched: [3:1.00] -; GENERIC-NEXT: cmovbew %si, %di # sched: [3:1.00] -; GENERIC-NEXT: cmovaw %si, %di # sched: [3:1.00] -; GENERIC-NEXT: cmovaw %si, %di # sched: [3:1.00] -; GENERIC-NEXT: cmovsw %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovnsw %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovpw %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovpw %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovnpw %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovnpw %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovlw %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovlw %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovgew %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovgew %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovlew %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovlew %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovgw %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovgw %si, %di # sched: [2:0.67] -; GENERIC-NEXT: cmovow (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovnow (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovbw (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovbw (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovbw (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovaew (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovaew (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovaew (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovew (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovew (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovnew (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovnew (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovbew (%rdx), %di # sched: [8:1.00] -; GENERIC-NEXT: cmovbew (%rdx), %di # sched: [8:1.00] -; GENERIC-NEXT: cmovaw (%rdx), %di # sched: [8:1.00] -; GENERIC-NEXT: cmovaw (%rdx), %di # sched: [8:1.00] -; GENERIC-NEXT: cmovsw (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovnsw (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovpw (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovpw (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovlw (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovlw (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovgew (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovgew (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovlew (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovlew (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovgw (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: cmovgw (%rdx), %di # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmov_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cmovow %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovnow %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovbw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovbw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovbw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovaew %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovaew %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovaew %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovew %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovew %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovnew %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovnew %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovbew %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovbew %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovaw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovaw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovsw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovnsw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovpw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovpw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovnpw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovnpw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovlw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovlw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovgew %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovgew %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovlew %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovlew %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovgw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovgw %si, %di # sched: [1:0.50] -; ATOM-NEXT: cmovow (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovnow (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovbw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovbw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovbw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovaew (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovaew (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovaew (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovew (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovew (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovnew (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovnew (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovbew (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovbew (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovaw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovaw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovsw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovnsw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovpw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovpw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovnpw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovnpw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovlw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovlw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovgew (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovgew (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovlew (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovlew (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovgw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: cmovgw (%rdx), %di # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmov_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cmovow %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovnow %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovbw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovbw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovbw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovaew %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovaew %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovaew %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovew %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovew %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovnew %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovnew %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovbew %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovbew %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovaw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovaw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovsw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovnsw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovpw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovpw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovnpw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovnpw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovlw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovlw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovgew %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovgew %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovlew %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovlew %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovgw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovgw %si, %di # sched: [2:1.00] -; SLM-NEXT: cmovow (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovnow (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovbw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovbw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovbw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovaew (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovaew (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovaew (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovew (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovew (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovnew (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovnew (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovbew (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovbew (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovaw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovaw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovsw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovnsw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovpw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovpw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovnpw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovnpw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovlw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovlw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovgew (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovgew (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovlew (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovlew (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovgw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: cmovgw (%rdx), %di # sched: [5:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cmov_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cmovow %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovnow %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovbw %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovbw %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovbw %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovaew %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovaew %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovaew %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovew %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovew %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovnew %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovnew %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovbew %si, %di # sched: [3:1.00] -; SANDY-NEXT: cmovbew %si, %di # sched: [3:1.00] -; SANDY-NEXT: cmovaw %si, %di # sched: [3:1.00] -; SANDY-NEXT: cmovaw %si, %di # sched: [3:1.00] -; SANDY-NEXT: cmovsw %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovnsw %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovpw %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovpw %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovnpw %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovnpw %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovlw %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovlw %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovgew %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovgew %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovlew %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovlew %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovgw %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovgw %si, %di # sched: [2:0.67] -; SANDY-NEXT: cmovow (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovnow (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovbw (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovbw (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovbw (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovaew (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovaew (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovaew (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovew (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovew (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovnew (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovnew (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovbew (%rdx), %di # sched: [8:1.00] -; SANDY-NEXT: cmovbew (%rdx), %di # sched: [8:1.00] -; SANDY-NEXT: cmovaw (%rdx), %di # sched: [8:1.00] -; SANDY-NEXT: cmovaw (%rdx), %di # sched: [8:1.00] -; SANDY-NEXT: cmovsw (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovnsw (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovpw (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovpw (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovlw (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovlw (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovgew (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovgew (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovlew (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovlew (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovgw (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: cmovgw (%rdx), %di # sched: [7:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmov_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cmovow %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovnow %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovbw %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovbw %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovbw %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovaew %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovaew %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovaew %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovew %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovew %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovnew %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovnew %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovbew %si, %di # sched: [3:0.75] -; HASWELL-NEXT: cmovbew %si, %di # sched: [3:0.75] -; HASWELL-NEXT: cmovaw %si, %di # sched: [3:0.75] -; HASWELL-NEXT: cmovaw %si, %di # sched: [3:0.75] -; HASWELL-NEXT: cmovsw %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovnsw %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovpw %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovpw %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovnpw %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovnpw %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovlw %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovlw %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovgew %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovgew %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovlew %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovlew %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovgw %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovgw %si, %di # sched: [2:0.50] -; HASWELL-NEXT: cmovow (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovnow (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovbw (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovbw (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovbw (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovaew (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovaew (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovaew (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovew (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovew (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovnew (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovnew (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovbew (%rdx), %di # sched: [8:0.75] -; HASWELL-NEXT: cmovbew (%rdx), %di # sched: [8:0.75] -; HASWELL-NEXT: cmovaw (%rdx), %di # sched: [8:0.75] -; HASWELL-NEXT: cmovaw (%rdx), %di # sched: [8:0.75] -; HASWELL-NEXT: cmovsw (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovnsw (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovpw (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovpw (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovnpw (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovnpw (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovlw (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovlw (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovgew (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovgew (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovlew (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovlew (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovgw (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: cmovgw (%rdx), %di # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmov_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cmovow %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovnow %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovbw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovbw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovbw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovaew %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovaew %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovaew %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovew %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovew %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovnew %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovnew %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovbew %si, %di # sched: [2:0.50] -; BROADWELL-NEXT: cmovbew %si, %di # sched: [2:0.50] -; BROADWELL-NEXT: cmovaw %si, %di # sched: [2:0.50] -; BROADWELL-NEXT: cmovaw %si, %di # sched: [2:0.50] -; BROADWELL-NEXT: cmovsw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovnsw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovpw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovpw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovnpw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovnpw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovlw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovlw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovgew %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovgew %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovlew %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovlew %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovgw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovgw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: cmovow (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovnow (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovbw (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovbw (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovbw (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovaew (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovaew (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovaew (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovew (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovew (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovnew (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovnew (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovbew (%rdx), %di # sched: [7:0.50] -; BROADWELL-NEXT: cmovbew (%rdx), %di # sched: [7:0.50] -; BROADWELL-NEXT: cmovaw (%rdx), %di # sched: [7:0.50] -; BROADWELL-NEXT: cmovaw (%rdx), %di # sched: [7:0.50] -; BROADWELL-NEXT: cmovsw (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovnsw (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovpw (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovpw (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovnpw (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovnpw (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovlw (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovlw (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovgew (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovgew (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovlew (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovlew (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovgw (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: cmovgw (%rdx), %di # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmov_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cmovow %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnow %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovbw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovbw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovbw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovaew %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovaew %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovaew %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovew %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovew %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnew %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnew %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovbew %si, %di # sched: [2:1.00] -; SKYLAKE-NEXT: cmovbew %si, %di # sched: [2:1.00] -; SKYLAKE-NEXT: cmovaw %si, %di # sched: [2:1.00] -; SKYLAKE-NEXT: cmovaw %si, %di # sched: [2:1.00] -; SKYLAKE-NEXT: cmovsw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnsw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovpw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovpw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnpw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnpw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovlw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovlw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovgew %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovgew %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovlew %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovlew %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovgw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovgw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: cmovow (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnow (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovbw (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovbw (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovbw (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovaew (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovaew (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovaew (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovew (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovew (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnew (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnew (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovbew (%rdx), %di # sched: [7:1.00] -; SKYLAKE-NEXT: cmovbew (%rdx), %di # sched: [7:1.00] -; SKYLAKE-NEXT: cmovaw (%rdx), %di # sched: [7:1.00] -; SKYLAKE-NEXT: cmovaw (%rdx), %di # sched: [7:1.00] -; SKYLAKE-NEXT: cmovsw (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnsw (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovpw (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovpw (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnpw (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnpw (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovlw (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovlw (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovgew (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovgew (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovlew (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovlew (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovgw (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: cmovgw (%rdx), %di # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmov_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cmovow %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovnow %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovbw %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovbw %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovbw %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovaew %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovaew %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovaew %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovew %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovew %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovnew %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovnew %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovbew %si, %di # sched: [2:1.00] -; SKX-NEXT: cmovbew %si, %di # sched: [2:1.00] -; SKX-NEXT: cmovaw %si, %di # sched: [2:1.00] -; SKX-NEXT: cmovaw %si, %di # sched: [2:1.00] -; SKX-NEXT: cmovsw %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovnsw %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovpw %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovpw %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovnpw %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovnpw %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovlw %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovlw %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovgew %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovgew %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovlew %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovlew %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovgw %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovgw %si, %di # sched: [1:0.50] -; SKX-NEXT: cmovow (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovnow (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovbw (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovbw (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovbw (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovaew (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovaew (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovaew (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovew (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovew (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovnew (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovnew (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovbew (%rdx), %di # sched: [7:1.00] -; SKX-NEXT: cmovbew (%rdx), %di # sched: [7:1.00] -; SKX-NEXT: cmovaw (%rdx), %di # sched: [7:1.00] -; SKX-NEXT: cmovaw (%rdx), %di # sched: [7:1.00] -; SKX-NEXT: cmovsw (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovnsw (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovpw (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovpw (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovnpw (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovnpw (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovlw (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovlw (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovgew (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovgew (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovlew (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovlew (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovgw (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: cmovgw (%rdx), %di # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmov_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmovow %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovnow %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovbw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovbw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovbw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovaew %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovaew %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovaew %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovew %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovew %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovnew %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovnew %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovbew %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovbew %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovaw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovaw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovsw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovnsw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovpw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovpw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovnpw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovnpw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovlw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovlw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovgew %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovgew %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovlew %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovlew %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovgw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovgw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: cmovow (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovnow (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovew (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovew (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovsw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovnsw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmov_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cmovow %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovnow %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovbw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovbw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovbw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovaew %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovaew %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovaew %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovew %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovew %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovnew %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovnew %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovbew %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovbew %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovaw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovaw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovsw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovnsw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovpw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovpw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovnpw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovnpw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovlw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovlw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovgew %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovgew %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovlew %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovlew %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovgw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovgw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: cmovow (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovnow (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovbw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovbw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovbw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovaew (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovaew (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovaew (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovew (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovew (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovnew (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovnew (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovbew (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovbew (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovaw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovaw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovsw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovnsw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovpw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovpw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovnpw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovnpw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovlw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovlw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovgew (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovgew (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovlew (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovlew (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovgw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: cmovgw (%rdx), %di # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmov_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cmovow %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovnow %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovbw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovbw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovbw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovaew %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovaew %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovaew %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovew %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovew %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovnew %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovnew %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovbew %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovbew %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovaw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovaw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovsw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovnsw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovpw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovpw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovnpw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovnpw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovlw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovlw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovgew %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovgew %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovlew %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovlew %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovgw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovgw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmovow (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovnow (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovbw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovbw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovbw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovaew (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovaew (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovaew (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovew (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovew (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovnew (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovnew (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovbew (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovbew (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovaw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovaw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovsw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovnsw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovpw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovpw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovnpw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovnpw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovlw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovlw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovgew (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovgew (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovlew (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovlew (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovgw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: cmovgw (%rdx), %di # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "cmovow $1, $0 \0A\09 cmovnow $1, $0 \0A\09 cmovbw $1, $0 \0A\09 cmovcw $1, $0 \0A\09 cmovnaew $1, $0 \0A\09 cmovnbw $1, $0 \0A\09 cmovncw $1, $0 \0A\09 cmovaew $1, $0 \0A\09 cmovzw $1, $0 \0A\09 cmovew $1, $0 \0A\09 cmovnzw $1, $0 \0A\09 cmovnew $1, $0 \0A\09 cmovbew $1, $0 \0A\09 cmovnaw $1, $0 \0A\09 cmovnbew $1, $0 \0A\09 cmovaw $1, $0 \0A\09 cmovsw $1, $0 \0A\09 cmovnsw $1, $0 \0A\09 cmovpw $1, $0 \0A\09 cmovpew $1, $0 \0A\09 cmovnpw $1, $0 \0A\09 cmovpow $1, $0 \0A\09 cmovlw $1, $0 \0A\09 cmovngew $1, $0 \0A\09 cmovnlw $1, $0 \0A\09 cmovgew $1, $0 \0A\09 cmovlew $1, $0 \0A\09 cmovngw $1, $0 \0A\09 cmovnlew $1, $0 \0A\09 cmovgw $1, $0 \0A\09 cmovow $2, $0 \0A\09 cmovnow $2, $0 \0A\09 cmovbw $2, $0 \0A\09 cmovcw $2, $0 \0A\09 cmovnaew $2, $0 \0A\09 cmovnbw $2, $0 \0A\09 cmovncw $2, $0 \0A\09 cmovaew $2, $0 \0A\09 cmovzw $2, $0 \0A\09 cmovew $2, $0 \0A\09 cmovnzw $2, $0 \0A\09 cmovnew $2, $0 \0A\09 cmovbew $2, $0 \0A\09 cmovnaw $2, $0 \0A\09 cmovnbew $2, $0 \0A\09 cmovaw $2, $0 \0A\09 cmovsw $2, $0 \0A\09 cmovnsw $2, $0 \0A\09 cmovpw $2, $0 \0A\09 cmovpew $2, $0 \0A\09 cmovnpw $2, $0 \0A\09 cmovpow $2, $0 \0A\09 cmovlw $2, $0 \0A\09 cmovngew $2, $0 \0A\09 cmovnlw $2, $0 \0A\09 cmovgew $2, $0 \0A\09 cmovlew $2, $0 \0A\09 cmovngw $2, $0 \0A\09 cmovnlew $2, $0 \0A\09 cmovgw $2, $0", "r,r,*m"(i16 %a0, i16 %a1, i16 *%a2) - ret void -} - -define void @test_cmov_32(i32 %a0, i32 %a1, i32 *%a2) optsize { -; GENERIC-LABEL: test_cmov_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmovol %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovnol %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovbl %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovbl %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovbl %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovael %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovael %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovael %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovel %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovel %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovnel %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovnel %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovbel %esi, %edi # sched: [3:1.00] -; GENERIC-NEXT: cmovbel %esi, %edi # sched: [3:1.00] -; GENERIC-NEXT: cmoval %esi, %edi # sched: [3:1.00] -; GENERIC-NEXT: cmoval %esi, %edi # sched: [3:1.00] -; GENERIC-NEXT: cmovsl %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovnsl %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovpl %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovpl %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovnpl %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovnpl %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovll %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovll %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovgel %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovgel %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovlel %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovlel %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovgl %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovgl %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: cmovol (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovnol (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovael (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovael (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovael (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovel (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovel (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00] -; GENERIC-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00] -; GENERIC-NEXT: cmoval (%rdx), %edi # sched: [8:1.00] -; GENERIC-NEXT: cmoval (%rdx), %edi # sched: [8:1.00] -; GENERIC-NEXT: cmovsl (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovnsl (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovll (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovll (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmov_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cmovol %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovnol %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovel %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovel %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovnel %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovnel %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovbel %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovbel %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmoval %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmoval %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovsl %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovnsl %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovpl %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovpl %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovll %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovll %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovgel %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovgel %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovlel %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovlel %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovgl %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovgl %esi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmovol (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovnol (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovbl (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovbl (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovbl (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovael (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovael (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovael (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovel (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovel (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovnel (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovnel (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovbel (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovbel (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmoval (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmoval (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovsl (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovnsl (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovpl (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovpl (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovnpl (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovnpl (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovll (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovll (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovgel (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovgel (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovlel (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovlel (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovgl (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: cmovgl (%rdx), %edi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmov_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cmovol %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovnol %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovbl %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovbl %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovbl %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovael %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovael %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovael %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovel %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovel %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovnel %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovnel %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovbel %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovbel %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmoval %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmoval %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovsl %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovnsl %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovpl %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovpl %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovnpl %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovnpl %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovll %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovll %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovgel %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovgel %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovlel %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovlel %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovgl %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovgl %esi, %edi # sched: [2:1.00] -; SLM-NEXT: cmovol (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovnol (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovbl (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovbl (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovbl (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovael (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovael (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovael (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovel (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovel (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovnel (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovnel (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovbel (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovbel (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmoval (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmoval (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovsl (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovnsl (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovpl (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovpl (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovnpl (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovnpl (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovll (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovll (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovgel (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovgel (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovlel (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovlel (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovgl (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: cmovgl (%rdx), %edi # sched: [5:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cmov_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cmovol %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovnol %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovbl %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovbl %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovbl %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovael %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovael %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovael %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovel %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovel %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovnel %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovnel %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovbel %esi, %edi # sched: [3:1.00] -; SANDY-NEXT: cmovbel %esi, %edi # sched: [3:1.00] -; SANDY-NEXT: cmoval %esi, %edi # sched: [3:1.00] -; SANDY-NEXT: cmoval %esi, %edi # sched: [3:1.00] -; SANDY-NEXT: cmovsl %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovnsl %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovpl %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovpl %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovnpl %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovnpl %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovll %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovll %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovgel %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovgel %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovlel %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovlel %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovgl %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovgl %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: cmovol (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovnol (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovael (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovael (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovael (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovel (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovel (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00] -; SANDY-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00] -; SANDY-NEXT: cmoval (%rdx), %edi # sched: [8:1.00] -; SANDY-NEXT: cmoval (%rdx), %edi # sched: [8:1.00] -; SANDY-NEXT: cmovsl (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovnsl (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovll (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovll (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmov_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cmovol %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovnol %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovbl %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovbl %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovbl %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovael %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovael %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovael %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovel %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovel %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovnel %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovnel %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovbel %esi, %edi # sched: [3:0.75] -; HASWELL-NEXT: cmovbel %esi, %edi # sched: [3:0.75] -; HASWELL-NEXT: cmoval %esi, %edi # sched: [3:0.75] -; HASWELL-NEXT: cmoval %esi, %edi # sched: [3:0.75] -; HASWELL-NEXT: cmovsl %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovnsl %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovpl %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovpl %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovnpl %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovnpl %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovll %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovll %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovgel %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovgel %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovlel %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovlel %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovgl %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovgl %esi, %edi # sched: [2:0.50] -; HASWELL-NEXT: cmovol (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovnol (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovbl (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovbl (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovbl (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovael (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovael (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovael (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovel (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovel (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovnel (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovnel (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovbel (%rdx), %edi # sched: [8:0.75] -; HASWELL-NEXT: cmovbel (%rdx), %edi # sched: [8:0.75] -; HASWELL-NEXT: cmoval (%rdx), %edi # sched: [8:0.75] -; HASWELL-NEXT: cmoval (%rdx), %edi # sched: [8:0.75] -; HASWELL-NEXT: cmovsl (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovnsl (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovpl (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovpl (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovll (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovll (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovgel (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovgel (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovlel (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovlel (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovgl (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: cmovgl (%rdx), %edi # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmov_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cmovol %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovnol %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovel %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovel %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovnel %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovnel %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovbel %esi, %edi # sched: [2:0.50] -; BROADWELL-NEXT: cmovbel %esi, %edi # sched: [2:0.50] -; BROADWELL-NEXT: cmoval %esi, %edi # sched: [2:0.50] -; BROADWELL-NEXT: cmoval %esi, %edi # sched: [2:0.50] -; BROADWELL-NEXT: cmovsl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovnsl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovpl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovpl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovll %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovll %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovgel %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovgel %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovlel %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovlel %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovgl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovgl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: cmovol (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovnol (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovael (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovael (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovael (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovel (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovel (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovnel (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovnel (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovbel (%rdx), %edi # sched: [7:0.50] -; BROADWELL-NEXT: cmovbel (%rdx), %edi # sched: [7:0.50] -; BROADWELL-NEXT: cmoval (%rdx), %edi # sched: [7:0.50] -; BROADWELL-NEXT: cmoval (%rdx), %edi # sched: [7:0.50] -; BROADWELL-NEXT: cmovsl (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovnsl (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovpl (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovpl (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovnpl (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovnpl (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovll (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovll (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovgel (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovgel (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovlel (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovlel (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovgl (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: cmovgl (%rdx), %edi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmov_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cmovol %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnol %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovel %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovel %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnel %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnel %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovbel %esi, %edi # sched: [2:1.00] -; SKYLAKE-NEXT: cmovbel %esi, %edi # sched: [2:1.00] -; SKYLAKE-NEXT: cmoval %esi, %edi # sched: [2:1.00] -; SKYLAKE-NEXT: cmoval %esi, %edi # sched: [2:1.00] -; SKYLAKE-NEXT: cmovsl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnsl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovpl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovpl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovll %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovll %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovgel %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovgel %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovlel %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovlel %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovgl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovgl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovol (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnol (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovael (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovael (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovael (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovel (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovel (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnel (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnel (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovbel (%rdx), %edi # sched: [7:1.00] -; SKYLAKE-NEXT: cmovbel (%rdx), %edi # sched: [7:1.00] -; SKYLAKE-NEXT: cmoval (%rdx), %edi # sched: [7:1.00] -; SKYLAKE-NEXT: cmoval (%rdx), %edi # sched: [7:1.00] -; SKYLAKE-NEXT: cmovsl (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnsl (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovpl (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovpl (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnpl (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnpl (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovll (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovll (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovgel (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovgel (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovlel (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovlel (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovgl (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovgl (%rdx), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmov_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cmovol %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovnol %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovel %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovel %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovnel %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovnel %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovbel %esi, %edi # sched: [2:1.00] -; SKX-NEXT: cmovbel %esi, %edi # sched: [2:1.00] -; SKX-NEXT: cmoval %esi, %edi # sched: [2:1.00] -; SKX-NEXT: cmoval %esi, %edi # sched: [2:1.00] -; SKX-NEXT: cmovsl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovnsl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovpl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovpl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovll %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovll %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovgel %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovgel %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovlel %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovlel %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovgl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovgl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: cmovol (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovnol (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovael (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovael (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovael (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovel (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovel (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovnel (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovnel (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovbel (%rdx), %edi # sched: [7:1.00] -; SKX-NEXT: cmovbel (%rdx), %edi # sched: [7:1.00] -; SKX-NEXT: cmoval (%rdx), %edi # sched: [7:1.00] -; SKX-NEXT: cmoval (%rdx), %edi # sched: [7:1.00] -; SKX-NEXT: cmovsl (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovnsl (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovpl (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovpl (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovnpl (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovnpl (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovll (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovll (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovgel (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovgel (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovlel (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovlel (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovgl (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: cmovgl (%rdx), %edi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmov_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmovol %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovnol %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovel %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovel %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovnel %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovnel %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovbel %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovbel %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmoval %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmoval %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovsl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovnsl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovpl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovpl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovll %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovll %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovgel %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovgel %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovlel %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovlel %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovgl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovgl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmovol (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovnol (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovsl (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovnsl (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmov_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cmovol %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovnol %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovel %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovel %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovnel %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovnel %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovbel %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovbel %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmoval %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmoval %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovsl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovnsl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovpl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovpl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovll %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovll %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovgel %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovgel %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovlel %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovlel %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovgl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovgl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmovol (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovnol (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovbl (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovbl (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovbl (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovael (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovael (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovael (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovel (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovel (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovnel (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovnel (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovbel (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovbel (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmoval (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmoval (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovsl (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovnsl (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovpl (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovpl (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovnpl (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovnpl (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovll (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovll (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovgel (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovgel (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovlel (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovlel (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovgl (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: cmovgl (%rdx), %edi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmov_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cmovol %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovnol %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovbl %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovbl %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovbl %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovael %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovael %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovael %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovel %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovel %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovnel %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovnel %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovbel %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovbel %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmoval %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmoval %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovsl %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovnsl %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovpl %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovpl %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovnpl %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovnpl %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovll %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovll %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovgel %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovgel %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovlel %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovlel %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovgl %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovgl %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmovol (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovnol (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovael (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovael (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovael (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovel (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovel (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovnel (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovnel (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovbel (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovbel (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmoval (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmoval (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovsl (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovnsl (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovpl (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovpl (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovnpl (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovnpl (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovll (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovll (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovgel (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovgel (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovlel (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovlel (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovgl (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: cmovgl (%rdx), %edi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "cmovol $1, $0 \0A\09 cmovnol $1, $0 \0A\09 cmovbl $1, $0 \0A\09 cmovcl $1, $0 \0A\09 cmovnael $1, $0 \0A\09 cmovnbl $1, $0 \0A\09 cmovncl $1, $0 \0A\09 cmovael $1, $0 \0A\09 cmovzl $1, $0 \0A\09 cmovel $1, $0 \0A\09 cmovnzl $1, $0 \0A\09 cmovnel $1, $0 \0A\09 cmovbel $1, $0 \0A\09 cmovnal $1, $0 \0A\09 cmovnbel $1, $0 \0A\09 cmoval $1, $0 \0A\09 cmovsl $1, $0 \0A\09 cmovnsl $1, $0 \0A\09 cmovpl $1, $0 \0A\09 cmovpel $1, $0 \0A\09 cmovnpl $1, $0 \0A\09 cmovpol $1, $0 \0A\09 cmovll $1, $0 \0A\09 cmovngel $1, $0 \0A\09 cmovnll $1, $0 \0A\09 cmovgel $1, $0 \0A\09 cmovlel $1, $0 \0A\09 cmovngl $1, $0 \0A\09 cmovnlel $1, $0 \0A\09 cmovgl $1, $0 \0A\09 cmovol $2, $0 \0A\09 cmovnol $2, $0 \0A\09 cmovbl $2, $0 \0A\09 cmovcl $2, $0 \0A\09 cmovnael $2, $0 \0A\09 cmovnbl $2, $0 \0A\09 cmovncl $2, $0 \0A\09 cmovael $2, $0 \0A\09 cmovzl $2, $0 \0A\09 cmovel $2, $0 \0A\09 cmovnzl $2, $0 \0A\09 cmovnel $2, $0 \0A\09 cmovbel $2, $0 \0A\09 cmovnal $2, $0 \0A\09 cmovnbel $2, $0 \0A\09 cmoval $2, $0 \0A\09 cmovsl $2, $0 \0A\09 cmovnsl $2, $0 \0A\09 cmovpl $2, $0 \0A\09 cmovpel $2, $0 \0A\09 cmovnpl $2, $0 \0A\09 cmovpol $2, $0 \0A\09 cmovll $2, $0 \0A\09 cmovngel $2, $0 \0A\09 cmovnll $2, $0 \0A\09 cmovgel $2, $0 \0A\09 cmovlel $2, $0 \0A\09 cmovngl $2, $0 \0A\09 cmovnlel $2, $0 \0A\09 cmovgl $2, $0", "r,r,*m"(i32 %a0, i32 %a1, i32 *%a2) - ret void -} - -define void @test_cmov_64(i64 %a0, i64 %a1, i64 *%a2) optsize { -; GENERIC-LABEL: test_cmov_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmovoq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovnoq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00] -; GENERIC-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00] -; GENERIC-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00] -; GENERIC-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00] -; GENERIC-NEXT: cmovsq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovnsq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: cmovoq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovnoq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00] -; GENERIC-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00] -; GENERIC-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00] -; GENERIC-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00] -; GENERIC-NEXT: cmovsq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovnsq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmov_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cmovoq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovsq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmovoq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovnoq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovbq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovbq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovbq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovaeq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovaeq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovaeq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmoveq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmoveq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovneq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovneq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovbeq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovbeq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovaq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovaq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovsq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovnsq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovpq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovpq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovnpq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovnpq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovlq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovlq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovgeq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovgeq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovleq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovleq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovgq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: cmovgq (%rdx), %rdi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmov_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cmovoq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovnoq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovbq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovbq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovbq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovaeq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovaeq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovaeq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmoveq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmoveq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovneq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovneq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovbeq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovbeq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovaq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovaq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovsq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovnsq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovpq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovpq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovnpq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovnpq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovlq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovlq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovgeq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovgeq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovleq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovleq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovgq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovgq %rsi, %rdi # sched: [2:1.00] -; SLM-NEXT: cmovoq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovnoq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovbq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovbq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovbq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovaeq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovaeq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovaeq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmoveq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmoveq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovneq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovneq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovbeq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovbeq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovaq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovaq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovsq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovnsq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovpq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovpq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovnpq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovnpq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovlq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovlq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovgeq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovgeq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovleq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovleq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovgq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: cmovgq (%rdx), %rdi # sched: [5:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cmov_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cmovoq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovnoq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00] -; SANDY-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00] -; SANDY-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00] -; SANDY-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00] -; SANDY-NEXT: cmovsq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovnsq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: cmovoq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovnoq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00] -; SANDY-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00] -; SANDY-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00] -; SANDY-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00] -; SANDY-NEXT: cmovsq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovnsq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmov_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cmovoq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovnoq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovbq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovbq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovbq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmoveq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmoveq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovneq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovneq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovbeq %rsi, %rdi # sched: [3:0.75] -; HASWELL-NEXT: cmovbeq %rsi, %rdi # sched: [3:0.75] -; HASWELL-NEXT: cmovaq %rsi, %rdi # sched: [3:0.75] -; HASWELL-NEXT: cmovaq %rsi, %rdi # sched: [3:0.75] -; HASWELL-NEXT: cmovsq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovnsq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovpq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovpq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovlq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovlq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovleq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovleq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovgq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovgq %rsi, %rdi # sched: [2:0.50] -; HASWELL-NEXT: cmovoq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovnoq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovbeq (%rdx), %rdi # sched: [8:0.75] -; HASWELL-NEXT: cmovbeq (%rdx), %rdi # sched: [8:0.75] -; HASWELL-NEXT: cmovaq (%rdx), %rdi # sched: [8:0.75] -; HASWELL-NEXT: cmovaq (%rdx), %rdi # sched: [8:0.75] -; HASWELL-NEXT: cmovsq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovnsq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmov_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cmovoq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovbeq %rsi, %rdi # sched: [2:0.50] -; BROADWELL-NEXT: cmovbeq %rsi, %rdi # sched: [2:0.50] -; BROADWELL-NEXT: cmovaq %rsi, %rdi # sched: [2:0.50] -; BROADWELL-NEXT: cmovaq %rsi, %rdi # sched: [2:0.50] -; BROADWELL-NEXT: cmovsq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: cmovoq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovnoq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmoveq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmoveq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovneq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovneq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovbeq (%rdx), %rdi # sched: [7:0.50] -; BROADWELL-NEXT: cmovbeq (%rdx), %rdi # sched: [7:0.50] -; BROADWELL-NEXT: cmovaq (%rdx), %rdi # sched: [7:0.50] -; BROADWELL-NEXT: cmovaq (%rdx), %rdi # sched: [7:0.50] -; BROADWELL-NEXT: cmovsq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovnsq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovpq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovpq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovnpq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovnpq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovlq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovlq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovgeq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovgeq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovleq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovleq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovgq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: cmovgq (%rdx), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmov_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cmovoq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovbeq %rsi, %rdi # sched: [2:1.00] -; SKYLAKE-NEXT: cmovbeq %rsi, %rdi # sched: [2:1.00] -; SKYLAKE-NEXT: cmovaq %rsi, %rdi # sched: [2:1.00] -; SKYLAKE-NEXT: cmovaq %rsi, %rdi # sched: [2:1.00] -; SKYLAKE-NEXT: cmovsq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: cmovoq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnoq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmoveq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmoveq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovneq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovneq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovbeq (%rdx), %rdi # sched: [7:1.00] -; SKYLAKE-NEXT: cmovbeq (%rdx), %rdi # sched: [7:1.00] -; SKYLAKE-NEXT: cmovaq (%rdx), %rdi # sched: [7:1.00] -; SKYLAKE-NEXT: cmovaq (%rdx), %rdi # sched: [7:1.00] -; SKYLAKE-NEXT: cmovsq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnsq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovpq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovpq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnpq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovnpq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovlq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovlq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovgeq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovgeq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovleq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovleq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovgq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: cmovgq (%rdx), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmov_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cmovoq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovbeq %rsi, %rdi # sched: [2:1.00] -; SKX-NEXT: cmovbeq %rsi, %rdi # sched: [2:1.00] -; SKX-NEXT: cmovaq %rsi, %rdi # sched: [2:1.00] -; SKX-NEXT: cmovaq %rsi, %rdi # sched: [2:1.00] -; SKX-NEXT: cmovsq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: cmovoq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovnoq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmoveq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmoveq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovneq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovneq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovbeq (%rdx), %rdi # sched: [7:1.00] -; SKX-NEXT: cmovbeq (%rdx), %rdi # sched: [7:1.00] -; SKX-NEXT: cmovaq (%rdx), %rdi # sched: [7:1.00] -; SKX-NEXT: cmovaq (%rdx), %rdi # sched: [7:1.00] -; SKX-NEXT: cmovsq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovnsq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovpq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovpq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovnpq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovnpq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovlq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovlq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovgeq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovgeq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovleq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovleq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovgq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: cmovgq (%rdx), %rdi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmov_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmovoq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovsq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmovoq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovnoq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovsq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovnsq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmov_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cmovoq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovsq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmovoq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovnoq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovbq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovbq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovbq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmoveq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmoveq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovneq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovneq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovaq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovaq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovsq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovnsq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovpq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovpq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovlq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovlq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovleq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovleq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovgq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: cmovgq (%rdx), %rdi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmov_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cmovoq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovbq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovbq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovbq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmoveq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmoveq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovneq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovneq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovaq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovaq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovsq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovpq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovpq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovlq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovlq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovleq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovleq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovgq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovgq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmovoq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovnoq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmoveq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmoveq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovneq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovneq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovbeq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovbeq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovaq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovaq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovsq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovnsq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovpq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovpq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovnpq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovnpq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovlq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovlq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovgeq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovgeq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovleq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovleq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovgq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: cmovgq (%rdx), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "cmovoq $1, $0 \0A\09 cmovnoq $1, $0 \0A\09 cmovbq $1, $0 \0A\09 cmovcq $1, $0 \0A\09 cmovnaeq $1, $0 \0A\09 cmovnbq $1, $0 \0A\09 cmovncq $1, $0 \0A\09 cmovaeq $1, $0 \0A\09 cmovzq $1, $0 \0A\09 cmoveq $1, $0 \0A\09 cmovnzq $1, $0 \0A\09 cmovneq $1, $0 \0A\09 cmovbeq $1, $0 \0A\09 cmovnaq $1, $0 \0A\09 cmovnbeq $1, $0 \0A\09 cmovaq $1, $0 \0A\09 cmovsq $1, $0 \0A\09 cmovnsq $1, $0 \0A\09 cmovpq $1, $0 \0A\09 cmovpeq $1, $0 \0A\09 cmovnpq $1, $0 \0A\09 cmovpoq $1, $0 \0A\09 cmovlq $1, $0 \0A\09 cmovngeq $1, $0 \0A\09 cmovnlq $1, $0 \0A\09 cmovgeq $1, $0 \0A\09 cmovleq $1, $0 \0A\09 cmovngq $1, $0 \0A\09 cmovnleq $1, $0 \0A\09 cmovgq $1, $0 \0A\09 cmovoq $2, $0 \0A\09 cmovnoq $2, $0 \0A\09 cmovbq $2, $0 \0A\09 cmovcq $2, $0 \0A\09 cmovnaeq $2, $0 \0A\09 cmovnbq $2, $0 \0A\09 cmovncq $2, $0 \0A\09 cmovaeq $2, $0 \0A\09 cmovzq $2, $0 \0A\09 cmoveq $2, $0 \0A\09 cmovnzq $2, $0 \0A\09 cmovneq $2, $0 \0A\09 cmovbeq $2, $0 \0A\09 cmovnaq $2, $0 \0A\09 cmovnbeq $2, $0 \0A\09 cmovaq $2, $0 \0A\09 cmovsq $2, $0 \0A\09 cmovnsq $2, $0 \0A\09 cmovpq $2, $0 \0A\09 cmovpeq $2, $0 \0A\09 cmovnpq $2, $0 \0A\09 cmovpoq $2, $0 \0A\09 cmovlq $2, $0 \0A\09 cmovngeq $2, $0 \0A\09 cmovnlq $2, $0 \0A\09 cmovgeq $2, $0 \0A\09 cmovleq $2, $0 \0A\09 cmovngq $2, $0 \0A\09 cmovnleq $2, $0 \0A\09 cmovgq $2, $0", "r,r,*m"(i64 %a0, i64 %a1, i64 *%a2) - ret void -} Index: test/CodeGen/X86/f16c-schedule.ll =================================================================== --- test/CodeGen/X86/f16c-schedule.ll +++ test/CodeGen/X86/f16c-schedule.ll @@ -1,255 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+f16c | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=IVY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) { -; GENERIC-LABEL: test_vcvtph2ps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00] -; GENERIC-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; IVY-LABEL: test_vcvtph2ps_128: -; IVY: # %bb.0: -; IVY-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00] -; IVY-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] -; IVY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_vcvtph2ps_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [6:1.00] -; HASWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vcvtph2ps_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [6:1.00] -; BROADWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vcvtph2ps_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [9:0.50] -; SKYLAKE-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_vcvtph2ps_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [13:1.00] -; BDVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_vcvtph2ps_128: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_vcvtph2ps_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [100:0.25] -; ZNVER1-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <8 x i16>, <8 x i16> *%a1 - %2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1) - %3 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) - %4 = fadd <4 x float> %2, %3 - ret <4 x float> %4 -} -declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) - -define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) { -; GENERIC-LABEL: test_vcvtph2ps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00] -; GENERIC-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; IVY-LABEL: test_vcvtph2ps_256: -; IVY: # %bb.0: -; IVY-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00] -; IVY-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00] -; IVY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_vcvtph2ps_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00] -; HASWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00] -; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vcvtph2ps_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [6:1.00] -; BROADWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00] -; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vcvtph2ps_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [10:0.50] -; SKYLAKE-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [7:1.00] -; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_vcvtph2ps_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [13:2.00] -; BDVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [8:2.00] -; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_vcvtph2ps_256: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:2.00] -; BTVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_vcvtph2ps_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [100:0.25] -; ZNVER1-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [100:0.25] -; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <8 x i16>, <8 x i16> *%a1 - %2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1) - %3 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) - %4 = fadd <8 x float> %2, %3 - ret <8 x float> %4 -} -declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) - -define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16> *%a2) { -; GENERIC-LABEL: test_vcvtps2ph_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; IVY-LABEL: test_vcvtps2ph_128: -; IVY: # %bb.0: -; IVY-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] -; IVY-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_vcvtps2ph_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vcvtps2ph_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vcvtps2ph_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [6:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_vcvtps2ph_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_vcvtps2ph_128: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_vcvtps2ph_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) - %2 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a1, i32 0) - %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> - store <4 x i16> %3, <4 x i16> *%a2 - ret <8 x i16> %1 -} -declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) - -define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_vcvtps2ph_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00] -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; IVY-LABEL: test_vcvtps2ph_256: -; IVY: # %bb.0: -; IVY-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] -; IVY-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00] -; IVY-NEXT: vzeroupper # sched: [1:1.00] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_vcvtps2ph_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00] -; HASWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00] -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vcvtps2ph_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00] -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vcvtps2ph_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00] -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_vcvtps2ph_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [8:2.00] -; BDVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:2.00] -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_vcvtps2ph_256: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:2.00] -; BTVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_vcvtps2ph_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) - %2 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a1, i32 0) - store <8 x i16> %2, <8 x i16> *%a2 - ret <8 x i16> %1 -} -declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) Index: test/CodeGen/X86/fma-schedule.ll =================================================================== --- test/CodeGen/X86/fma-schedule.ll +++ test/CodeGen/X86/fma-schedule.ll @@ -1,3317 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -; -; VFMADD -; - -define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddpd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; GENERIC-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmaddpd_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; BDVER2-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmaddpd_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50] -; HASWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50] -; HASWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmaddpd_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; BROADWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmaddpd_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmaddpd_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; KNL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; KNL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50] -; KNL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50] -; KNL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmaddpd_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmaddpd_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50] -; ZNVER1-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmadd132pd $2, $1, $0 \0A\09 vfmadd213pd $2, $1, $0 \0A\09 vfmadd231pd $2, $1, $0 \0A\09 vfmadd132pd $3, $1, $0 \0A\09 vfmadd213pd $3, $1, $0 \0A\09 vfmadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddpd_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] -; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] -; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50] -; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50] -; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmaddpd_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] -; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] -; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:1.00] -; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:1.00] -; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmaddpd_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50] -; HASWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] -; HASWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmaddpd_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] -; BROADWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] -; BROADWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmaddpd_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] -; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] -; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmaddpd_256: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] -; KNL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] -; KNL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; KNL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50] -; KNL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] -; KNL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmaddpd_256: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50] -; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50] -; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50] -; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] -; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmaddpd_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] -; ZNVER1-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmadd132pd $2, $1, $0 \0A\09 vfmadd213pd $2, $1, $0 \0A\09 vfmadd231pd $2, $1, $0 \0A\09 vfmadd132pd $3, $1, $0 \0A\09 vfmadd213pd $3, $1, $0 \0A\09 vfmadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind - ret void -} - -define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; GENERIC-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmaddps_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; BDVER2-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmaddps_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50] -; HASWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50] -; HASWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmaddps_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; BROADWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmaddps_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmaddps_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; KNL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50] -; KNL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50] -; KNL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmaddps_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmaddps_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50] -; ZNVER1-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmadd132ps $2, $1, $0 \0A\09 vfmadd213ps $2, $1, $0 \0A\09 vfmadd231ps $2, $1, $0 \0A\09 vfmadd132ps $3, $1, $0 \0A\09 vfmadd213ps $3, $1, $0 \0A\09 vfmadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] -; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] -; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50] -; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50] -; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmaddps_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] -; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] -; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:1.00] -; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:1.00] -; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmaddps_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50] -; HASWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] -; HASWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmaddps_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] -; BROADWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] -; BROADWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmaddps_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] -; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] -; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmaddps_256: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] -; KNL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] -; KNL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50] -; KNL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] -; KNL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmaddps_256: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50] -; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50] -; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] -; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmaddps_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50] -; ZNVER1-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmadd132ps $2, $1, $0 \0A\09 vfmadd213ps $2, $1, $0 \0A\09 vfmadd231ps $2, $1, $0 \0A\09 vfmadd132ps $3, $1, $0 \0A\09 vfmadd213ps $3, $1, $0 \0A\09 vfmadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind - ret void -} - -define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddsd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; GENERIC-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmaddsd_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; BDVER2-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmaddsd_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; HASWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; HASWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmaddsd_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; BROADWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmaddsd_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] -; SKYLAKE-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] -; SKYLAKE-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmaddsd_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; KNL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; KNL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; KNL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; KNL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmaddsd_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] -; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] -; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmaddsd_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50] -; ZNVER1-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmadd132sd $2, $1, $0 \0A\09 vfmadd213sd $2, $1, $0 \0A\09 vfmadd231sd $2, $1, $0 \0A\09 vfmadd132sd $3, $1, $0 \0A\09 vfmadd213sd $3, $1, $0 \0A\09 vfmadd231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddss_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; GENERIC-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmaddss_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; BDVER2-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmaddss_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; HASWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmaddss_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; BROADWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmaddss_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] -; SKYLAKE-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] -; SKYLAKE-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmaddss_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; KNL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] -; KNL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] -; KNL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmaddss_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] -; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] -; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmaddss_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50] -; ZNVER1-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmadd132ss $2, $1, $0 \0A\09 vfmadd213ss $2, $1, $0 \0A\09 vfmadd231ss $2, $1, $0 \0A\09 vfmadd132ss $3, $1, $0 \0A\09 vfmadd213ss $3, $1, $0 \0A\09 vfmadd231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -; -; VFMADDSUB -; - -define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddsubpd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] -; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmaddsubpd_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] -; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmaddsubpd_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50] -; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50] -; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmaddsubpd_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] -; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmaddsubpd_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] -; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] -; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmaddsubpd_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] -; KNL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] -; KNL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; KNL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50] -; KNL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50] -; KNL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmaddsubpd_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50] -; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50] -; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50] -; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] -; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] -; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmaddsubpd_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [12:0.50] -; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmaddsub132pd $2, $1, $0 \0A\09 vfmaddsub213pd $2, $1, $0 \0A\09 vfmaddsub231pd $2, $1, $0 \0A\09 vfmaddsub132pd $3, $1, $0 \0A\09 vfmaddsub213pd $3, $1, $0 \0A\09 vfmaddsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddsubpd_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50] -; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50] -; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmaddsubpd_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:1.00] -; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:1.00] -; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmaddsubpd_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50] -; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] -; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmaddsubpd_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] -; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] -; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmaddsubpd_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] -; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] -; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmaddsubpd_256: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] -; KNL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] -; KNL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; KNL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50] -; KNL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] -; KNL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmaddsubpd_256: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50] -; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50] -; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50] -; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] -; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] -; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmaddsubpd_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] -; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmaddsub132pd $2, $1, $0 \0A\09 vfmaddsub213pd $2, $1, $0 \0A\09 vfmaddsub231pd $2, $1, $0 \0A\09 vfmaddsub132pd $3, $1, $0 \0A\09 vfmaddsub213pd $3, $1, $0 \0A\09 vfmaddsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind - ret void -} - -define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddsubps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] -; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmaddsubps_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] -; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmaddsubps_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50] -; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50] -; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmaddsubps_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] -; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmaddsubps_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] -; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] -; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmaddsubps_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] -; KNL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] -; KNL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; KNL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50] -; KNL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50] -; KNL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmaddsubps_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50] -; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50] -; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50] -; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] -; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] -; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmaddsubps_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [12:0.50] -; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmaddsub132ps $2, $1, $0 \0A\09 vfmaddsub213ps $2, $1, $0 \0A\09 vfmaddsub231ps $2, $1, $0 \0A\09 vfmaddsub132ps $3, $1, $0 \0A\09 vfmaddsub213ps $3, $1, $0 \0A\09 vfmaddsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddsubps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50] -; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50] -; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmaddsubps_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:1.00] -; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:1.00] -; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmaddsubps_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50] -; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] -; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmaddsubps_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] -; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] -; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmaddsubps_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] -; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] -; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmaddsubps_256: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] -; KNL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] -; KNL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; KNL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50] -; KNL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] -; KNL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmaddsubps_256: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50] -; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50] -; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50] -; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] -; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] -; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmaddsubps_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50] -; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmaddsub132ps $2, $1, $0 \0A\09 vfmaddsub213ps $2, $1, $0 \0A\09 vfmaddsub231ps $2, $1, $0 \0A\09 vfmaddsub132ps $3, $1, $0 \0A\09 vfmaddsub213ps $3, $1, $0 \0A\09 vfmaddsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind - ret void -} - -; -; VFMSUBADD -; - -define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubaddpd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] -; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmsubaddpd_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] -; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmsubaddpd_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50] -; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50] -; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmsubaddpd_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] -; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmsubaddpd_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] -; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] -; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmsubaddpd_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] -; KNL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] -; KNL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; KNL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50] -; KNL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50] -; KNL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmsubaddpd_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50] -; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50] -; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50] -; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] -; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] -; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmsubaddpd_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [12:0.50] -; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmsubadd132pd $2, $1, $0 \0A\09 vfmsubadd213pd $2, $1, $0 \0A\09 vfmsubadd231pd $2, $1, $0 \0A\09 vfmsubadd132pd $3, $1, $0 \0A\09 vfmsubadd213pd $3, $1, $0 \0A\09 vfmsubadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubaddpd_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50] -; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50] -; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmsubaddpd_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:1.00] -; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:1.00] -; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmsubaddpd_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50] -; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] -; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmsubaddpd_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] -; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] -; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmsubaddpd_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] -; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] -; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmsubaddpd_256: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] -; KNL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] -; KNL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; KNL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50] -; KNL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] -; KNL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmsubaddpd_256: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50] -; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50] -; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50] -; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] -; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] -; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmsubaddpd_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] -; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmsubadd132pd $2, $1, $0 \0A\09 vfmsubadd213pd $2, $1, $0 \0A\09 vfmsubadd231pd $2, $1, $0 \0A\09 vfmsubadd132pd $3, $1, $0 \0A\09 vfmsubadd213pd $3, $1, $0 \0A\09 vfmsubadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind - ret void -} - -define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubaddps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] -; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmsubaddps_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] -; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmsubaddps_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50] -; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50] -; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmsubaddps_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] -; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmsubaddps_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] -; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] -; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmsubaddps_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] -; KNL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] -; KNL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; KNL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50] -; KNL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50] -; KNL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmsubaddps_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50] -; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50] -; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50] -; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] -; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] -; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmsubaddps_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [12:0.50] -; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmsubadd132ps $2, $1, $0 \0A\09 vfmsubadd213ps $2, $1, $0 \0A\09 vfmsubadd231ps $2, $1, $0 \0A\09 vfmsubadd132ps $3, $1, $0 \0A\09 vfmsubadd213ps $3, $1, $0 \0A\09 vfmsubadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubaddps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50] -; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50] -; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmsubaddps_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:1.00] -; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:1.00] -; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmsubaddps_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50] -; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] -; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmsubaddps_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] -; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] -; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmsubaddps_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] -; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] -; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmsubaddps_256: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] -; KNL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] -; KNL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; KNL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50] -; KNL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] -; KNL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmsubaddps_256: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50] -; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50] -; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50] -; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] -; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] -; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmsubaddps_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50] -; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmsubadd132ps $2, $1, $0 \0A\09 vfmsubadd213ps $2, $1, $0 \0A\09 vfmsubadd231ps $2, $1, $0 \0A\09 vfmsubadd132ps $3, $1, $0 \0A\09 vfmsubadd213ps $3, $1, $0 \0A\09 vfmsubadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind - ret void -} - -; -; VFMSUB -; - -define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubpd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; GENERIC-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmsubpd_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; BDVER2-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmsubpd_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50] -; HASWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50] -; HASWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmsubpd_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; BROADWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmsubpd_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmsubpd_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; KNL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; KNL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; KNL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50] -; KNL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50] -; KNL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmsubpd_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmsubpd_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50] -; ZNVER1-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmsub132pd $2, $1, $0 \0A\09 vfmsub213pd $2, $1, $0 \0A\09 vfmsub231pd $2, $1, $0 \0A\09 vfmsub132pd $3, $1, $0 \0A\09 vfmsub213pd $3, $1, $0 \0A\09 vfmsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubpd_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] -; GENERIC-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] -; GENERIC-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50] -; GENERIC-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50] -; GENERIC-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmsubpd_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] -; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] -; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:1.00] -; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:1.00] -; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmsubpd_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; HASWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50] -; HASWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] -; HASWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmsubpd_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] -; BROADWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] -; BROADWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmsubpd_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] -; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] -; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmsubpd_256: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] -; KNL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] -; KNL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; KNL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50] -; KNL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] -; KNL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmsubpd_256: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50] -; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50] -; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50] -; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] -; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] -; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmsubpd_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] -; ZNVER1-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmsub132pd $2, $1, $0 \0A\09 vfmsub213pd $2, $1, $0 \0A\09 vfmsub231pd $2, $1, $0 \0A\09 vfmsub132pd $3, $1, $0 \0A\09 vfmsub213pd $3, $1, $0 \0A\09 vfmsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind - ret void -} - -define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; GENERIC-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmsubps_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; BDVER2-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmsubps_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50] -; HASWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50] -; HASWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmsubps_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; BROADWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmsubps_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmsubps_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; KNL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; KNL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; KNL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50] -; KNL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50] -; KNL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmsubps_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmsubps_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50] -; ZNVER1-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmsub132ps $2, $1, $0 \0A\09 vfmsub213ps $2, $1, $0 \0A\09 vfmsub231ps $2, $1, $0 \0A\09 vfmsub132ps $3, $1, $0 \0A\09 vfmsub213ps $3, $1, $0 \0A\09 vfmsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] -; GENERIC-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] -; GENERIC-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50] -; GENERIC-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50] -; GENERIC-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmsubps_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] -; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] -; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:1.00] -; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:1.00] -; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmsubps_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; HASWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50] -; HASWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] -; HASWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmsubps_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] -; BROADWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] -; BROADWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmsubps_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] -; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] -; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmsubps_256: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] -; KNL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] -; KNL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; KNL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50] -; KNL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] -; KNL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmsubps_256: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50] -; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50] -; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50] -; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] -; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] -; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmsubps_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50] -; ZNVER1-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmsub132ps $2, $1, $0 \0A\09 vfmsub213ps $2, $1, $0 \0A\09 vfmsub231ps $2, $1, $0 \0A\09 vfmsub132ps $3, $1, $0 \0A\09 vfmsub213ps $3, $1, $0 \0A\09 vfmsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind - ret void -} - -define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubsd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; GENERIC-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmsubsd_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; BDVER2-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmsubsd_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; HASWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; HASWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmsubsd_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; BROADWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmsubsd_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] -; SKYLAKE-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] -; SKYLAKE-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmsubsd_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; KNL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; KNL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; KNL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; KNL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; KNL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmsubsd_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] -; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] -; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmsubsd_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50] -; ZNVER1-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmsub132sd $2, $1, $0 \0A\09 vfmsub213sd $2, $1, $0 \0A\09 vfmsub231sd $2, $1, $0 \0A\09 vfmsub132sd $3, $1, $0 \0A\09 vfmsub213sd $3, $1, $0 \0A\09 vfmsub231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubss_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; GENERIC-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfmsubss_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; BDVER2-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfmsubss_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; HASWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; HASWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfmsubss_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; BROADWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfmsubss_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] -; SKYLAKE-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] -; SKYLAKE-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfmsubss_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; KNL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; KNL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] -; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] -; KNL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfmsubss_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] -; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] -; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfmsubss_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50] -; ZNVER1-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfmsub132ss $2, $1, $0 \0A\09 vfmsub213ss $2, $1, $0 \0A\09 vfmsub231ss $2, $1, $0 \0A\09 vfmsub132ss $3, $1, $0 \0A\09 vfmsub213ss $3, $1, $0 \0A\09 vfmsub231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -; -; VFNMADD -; - -define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfnmaddpd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; GENERIC-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfnmaddpd_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; BDVER2-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfnmaddpd_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50] -; HASWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50] -; HASWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfnmaddpd_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfnmaddpd_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfnmaddpd_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; KNL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50] -; KNL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50] -; KNL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfnmaddpd_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfnmaddpd_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50] -; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfnmadd132pd $2, $1, $0 \0A\09 vfnmadd213pd $2, $1, $0 \0A\09 vfnmadd231pd $2, $1, $0 \0A\09 vfnmadd132pd $3, $1, $0 \0A\09 vfnmadd213pd $3, $1, $0 \0A\09 vfnmadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfnmaddpd_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50] -; GENERIC-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50] -; GENERIC-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfnmaddpd_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:1.00] -; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:1.00] -; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfnmaddpd_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50] -; HASWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] -; HASWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfnmaddpd_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] -; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfnmaddpd_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] -; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfnmaddpd_256: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; KNL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; KNL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50] -; KNL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] -; KNL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfnmaddpd_256: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50] -; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50] -; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] -; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfnmaddpd_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50] -; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] -; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfnmadd132pd $2, $1, $0 \0A\09 vfnmadd213pd $2, $1, $0 \0A\09 vfnmadd231pd $2, $1, $0 \0A\09 vfnmadd132pd $3, $1, $0 \0A\09 vfnmadd213pd $3, $1, $0 \0A\09 vfnmadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind - ret void -} - -define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfnmaddps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; GENERIC-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfnmaddps_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; BDVER2-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfnmaddps_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50] -; HASWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfnmaddps_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfnmaddps_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfnmaddps_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; KNL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50] -; KNL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfnmaddps_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfnmaddps_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50] -; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfnmadd132ps $2, $1, $0 \0A\09 vfnmadd213ps $2, $1, $0 \0A\09 vfnmadd231ps $2, $1, $0 \0A\09 vfnmadd132ps $3, $1, $0 \0A\09 vfnmadd213ps $3, $1, $0 \0A\09 vfnmadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfnmaddps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50] -; GENERIC-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50] -; GENERIC-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfnmaddps_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:1.00] -; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:1.00] -; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfnmaddps_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] -; HASWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfnmaddps_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] -; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfnmaddps_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] -; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfnmaddps_256: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; KNL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; KNL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] -; KNL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfnmaddps_256: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50] -; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50] -; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfnmaddps_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50] -; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50] -; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfnmadd132ps $2, $1, $0 \0A\09 vfnmadd213ps $2, $1, $0 \0A\09 vfnmadd231ps $2, $1, $0 \0A\09 vfnmadd132ps $3, $1, $0 \0A\09 vfnmadd213ps $3, $1, $0 \0A\09 vfnmadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind - ret void -} - -define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfnmaddsd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; GENERIC-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfnmaddsd_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; BDVER2-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfnmaddsd_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; HASWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; HASWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfnmaddsd_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; BROADWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfnmaddsd_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] -; SKYLAKE-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; SKYLAKE-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfnmaddsd_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; KNL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; KNL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; KNL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfnmaddsd_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] -; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfnmaddsd_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50] -; ZNVER1-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfnmadd132sd $2, $1, $0 \0A\09 vfnmadd213sd $2, $1, $0 \0A\09 vfnmadd231sd $2, $1, $0 \0A\09 vfnmadd132sd $3, $1, $0 \0A\09 vfnmadd213sd $3, $1, $0 \0A\09 vfnmadd231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfnmaddss_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; GENERIC-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfnmaddss_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; BDVER2-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfnmaddss_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; HASWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfnmaddss_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; BROADWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfnmaddss_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] -; SKYLAKE-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; SKYLAKE-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfnmaddss_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; KNL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] -; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; KNL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfnmaddss_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] -; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfnmaddss_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50] -; ZNVER1-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfnmadd132ss $2, $1, $0 \0A\09 vfnmadd213ss $2, $1, $0 \0A\09 vfnmadd231ss $2, $1, $0 \0A\09 vfnmadd132ss $3, $1, $0 \0A\09 vfnmadd213ss $3, $1, $0 \0A\09 vfnmadd231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -; -; VFNMSUB -; - -define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfnmsubpd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; GENERIC-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfnmsubpd_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; BDVER2-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfnmsubpd_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50] -; HASWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50] -; HASWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfnmsubpd_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfnmsubpd_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfnmsubpd_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; KNL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; KNL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; KNL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50] -; KNL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50] -; KNL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfnmsubpd_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfnmsubpd_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50] -; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfnmsub132pd $2, $1, $0 \0A\09 vfnmsub213pd $2, $1, $0 \0A\09 vfnmsub231pd $2, $1, $0 \0A\09 vfnmsub132pd $3, $1, $0 \0A\09 vfnmsub213pd $3, $1, $0 \0A\09 vfnmsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfnmsubpd_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50] -; GENERIC-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50] -; GENERIC-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfnmsubpd_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:1.00] -; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:1.00] -; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfnmsubpd_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50] -; HASWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] -; HASWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfnmsubpd_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] -; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] -; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfnmsubpd_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] -; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] -; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfnmsubpd_256: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] -; KNL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] -; KNL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; KNL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50] -; KNL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] -; KNL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfnmsubpd_256: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50] -; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50] -; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50] -; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] -; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] -; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfnmsubpd_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50] -; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] -; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfnmsub132pd $2, $1, $0 \0A\09 vfnmsub213pd $2, $1, $0 \0A\09 vfnmsub231pd $2, $1, $0 \0A\09 vfnmsub132pd $3, $1, $0 \0A\09 vfnmsub213pd $3, $1, $0 \0A\09 vfnmsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind - ret void -} - -define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfnmsubps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; GENERIC-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfnmsubps_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; BDVER2-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfnmsubps_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50] -; HASWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50] -; HASWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfnmsubps_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfnmsubps_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfnmsubps_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; KNL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; KNL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; KNL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50] -; KNL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50] -; KNL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfnmsubps_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfnmsubps_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50] -; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfnmsub132ps $2, $1, $0 \0A\09 vfnmsub213ps $2, $1, $0 \0A\09 vfnmsub231ps $2, $1, $0 \0A\09 vfnmsub132ps $3, $1, $0 \0A\09 vfnmsub213ps $3, $1, $0 \0A\09 vfnmsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfnmsubps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50] -; GENERIC-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50] -; GENERIC-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfnmsubps_256: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:1.00] -; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:1.00] -; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [46:4.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfnmsubps_256: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50] -; HASWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] -; HASWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vzeroupper # sched: [0:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfnmsubps_256: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] -; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] -; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vzeroupper # sched: [0:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfnmsubps_256: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] -; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] -; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfnmsubps_256: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] -; KNL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] -; KNL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; KNL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50] -; KNL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] -; KNL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfnmsubps_256: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50] -; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50] -; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50] -; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] -; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] -; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vzeroupper # sched: [0:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfnmsubps_256: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50] -; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50] -; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfnmsub132ps $2, $1, $0 \0A\09 vfnmsub213ps $2, $1, $0 \0A\09 vfnmsub231ps $2, $1, $0 \0A\09 vfnmsub132ps $3, $1, $0 \0A\09 vfnmsub213ps $3, $1, $0 \0A\09 vfnmsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind - ret void -} - -define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfnmsubsd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; GENERIC-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfnmsubsd_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; BDVER2-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfnmsubsd_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; HASWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; HASWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfnmsubsd_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; BROADWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfnmsubsd_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] -; SKYLAKE-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] -; SKYLAKE-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfnmsubsd_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; KNL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; KNL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; KNL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; KNL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; KNL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfnmsubsd_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] -; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] -; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfnmsubsd_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50] -; ZNVER1-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfnmsub132sd $2, $1, $0 \0A\09 vfnmsub213sd $2, $1, $0 \0A\09 vfnmsub231sd $2, $1, $0 \0A\09 vfnmsub132sd $3, $1, $0 \0A\09 vfnmsub213sd $3, $1, $0 \0A\09 vfnmsub231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfnmsubss_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; GENERIC-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; GENERIC-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; GENERIC-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_vfnmsubss_128: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; BDVER2-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; BDVER2-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; HASWELL-LABEL: test_vfnmsubss_128: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; HASWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; HASWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; HASWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_vfnmsubss_128: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; BROADWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; BROADWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; BROADWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_vfnmsubss_128: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKYLAKE-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] -; SKYLAKE-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] -; SKYLAKE-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_vfnmsubss_128: -; KNL: # %bb.0: -; KNL-NEXT: #APP -; KNL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; KNL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; KNL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; KNL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] -; KNL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] -; KNL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] -; KNL-NEXT: #NO_APP -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_vfnmsubss_128: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] -; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] -; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] -; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] -; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] -; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_vfnmsubss_128: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50] -; ZNVER1-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50] -; ZNVER1-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50] -; ZNVER1-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "vfnmsub132ss $2, $1, $0 \0A\09 vfnmsub213ss $2, $1, $0 \0A\09 vfnmsub231ss $2, $1, $0 \0A\09 vfnmsub132ss $3, $1, $0 \0A\09 vfnmsub213ss $3, $1, $0 \0A\09 vfnmsub231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} Index: test/CodeGen/X86/fma4-schedule.ll =================================================================== --- test/CodeGen/X86/fma4-schedule.ll +++ test/CodeGen/X86/fma4-schedule.ll @@ -1,1058 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 | FileCheck %s --check-prefixes=CHECK,GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER12,BDVER1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER12,BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER34,BDVER3 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER34,BDVER4 - -; -; VFMADD -; - -define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddpd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmaddpd_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmaddpd_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfmaddpd $2, $1, $0, $0 \0A\09 vfmaddpd $3, $1, $0, $0 \0A\09 vfmaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddpd_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmaddpd_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmaddpd_256: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: vzeroupper -; BDVER34-NEXT: retq - tail call void asm "vfmaddpd $2, $1, $0, $0 \0A\09 vfmaddpd $3, $1, $0, $0 \0A\09 vfmaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind - ret void -} - -define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmaddps_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmaddps_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfmaddps $2, $1, $0, $0 \0A\09 vfmaddps $3, $1, $0, $0 \0A\09 vfmaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmaddps_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmaddps_256: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: vzeroupper -; BDVER34-NEXT: retq - tail call void asm "vfmaddps $2, $1, $0, $0 \0A\09 vfmaddps $3, $1, $0, $0 \0A\09 vfmaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind - ret void -} - -define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddsd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmaddsd_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmaddsd_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfmaddsd $2, $1, $0, $0 \0A\09 vfmaddsd $3, $1, $0, $0 \0A\09 vfmaddsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddss_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmaddss_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmaddss_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfmaddss $2, $1, $0, $0 \0A\09 vfmaddss $3, $1, $0, $0 \0A\09 vfmaddss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -; -; VFMADDSUB -; - -define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddsubpd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmaddsubpd_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmaddsubpd_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfmaddsubpd $2, $1, $0, $0 \0A\09 vfmaddsubpd $3, $1, $0, $0 \0A\09 vfmaddsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddsubpd_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmaddsubpd_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmaddsubpd_256: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: vzeroupper -; BDVER34-NEXT: retq - tail call void asm "vfmaddsubpd $2, $1, $0, $0 \0A\09 vfmaddsubpd $3, $1, $0, $0 \0A\09 vfmaddsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind - ret void -} - -define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddsubps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmaddsubps_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmaddsubps_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfmaddsubps $2, $1, $0, $0 \0A\09 vfmaddsubps $3, $1, $0, $0 \0A\09 vfmaddsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmaddsubps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmaddsubps_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmaddsubps_256: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: vzeroupper -; BDVER34-NEXT: retq - tail call void asm "vfmaddsubps $2, $1, $0, $0 \0A\09 vfmaddsubps $3, $1, $0, $0 \0A\09 vfmaddsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind - ret void -} - -; -; VFMSUBADD -; - -define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubaddpd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmsubaddpd_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmsubaddpd_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfmsubaddpd $2, $1, $0, $0 \0A\09 vfmsubaddpd $3, $1, $0, $0 \0A\09 vfmsubaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubaddpd_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmsubaddpd_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmsubaddpd_256: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: vzeroupper -; BDVER34-NEXT: retq - tail call void asm "vfmsubaddpd $2, $1, $0, $0 \0A\09 vfmsubaddpd $3, $1, $0, $0 \0A\09 vfmsubaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind - ret void -} - -define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubaddps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmsubaddps_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmsubaddps_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfmsubaddps $2, $1, $0, $0 \0A\09 vfmsubaddps $3, $1, $0, $0 \0A\09 vfmsubaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubaddps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmsubaddps_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmsubaddps_256: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: vzeroupper -; BDVER34-NEXT: retq - tail call void asm "vfmsubaddps $2, $1, $0, $0 \0A\09 vfmsubaddps $3, $1, $0, $0 \0A\09 vfmsubaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind - ret void -} - -; -; VFMSUB -; - -define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubpd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmsubpd_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmsubpd_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfmsubpd $2, $1, $0, $0 \0A\09 vfmsubpd $3, $1, $0, $0 \0A\09 vfmsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubpd_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmsubpd_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmsubpd_256: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: vzeroupper -; BDVER34-NEXT: retq - tail call void asm "vfmsubpd $2, $1, $0, $0 \0A\09 vfmsubpd $3, $1, $0, $0 \0A\09 vfmsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind - ret void -} - -define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmsubps_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmsubps_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfmsubps $2, $1, $0, $0 \0A\09 vfmsubps $3, $1, $0, $0 \0A\09 vfmsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmsubps_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmsubps_256: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: vzeroupper -; BDVER34-NEXT: retq - tail call void asm "vfmsubps $2, $1, $0, $0 \0A\09 vfmsubps $3, $1, $0, $0 \0A\09 vfmsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind - ret void -} - -define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubsd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmsubsd_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmsubsd_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfmsubsd $2, $1, $0, $0 \0A\09 vfmsubsd $3, $1, $0, $0 \0A\09 vfmsubsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfmsubss_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfmsubss_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfmsubss_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfmsubss $2, $1, $0, $0 \0A\09 vfmsubss $3, $1, $0, $0 \0A\09 vfmsubss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -; -; VFNMADD -; - -define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfnmaddpd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfnmaddpd_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfnmaddpd_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfnmaddpd $2, $1, $0, $0 \0A\09 vfnmaddpd $3, $1, $0, $0 \0A\09 vfnmaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfnmaddpd_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfnmaddpd_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfnmaddpd_256: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: vzeroupper -; BDVER34-NEXT: retq - tail call void asm "vfnmaddpd $2, $1, $0, $0 \0A\09 vfnmaddpd $3, $1, $0, $0 \0A\09 vfnmaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind - ret void -} - -define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfnmaddps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfnmaddps_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfnmaddps_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfnmaddps $2, $1, $0, $0 \0A\09 vfnmaddps $3, $1, $0, $0 \0A\09 vfnmaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfnmaddps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfnmaddps_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfnmaddps_256: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: vzeroupper -; BDVER34-NEXT: retq - tail call void asm "vfnmaddps $2, $1, $0, $0 \0A\09 vfnmaddps $3, $1, $0, $0 \0A\09 vfnmaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind - ret void -} - -define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfnmaddsd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfnmaddsd_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfnmaddsd_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfnmaddsd $2, $1, $0, $0 \0A\09 vfnmaddsd $3, $1, $0, $0 \0A\09 vfnmaddsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfnmaddss_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfnmaddss_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfnmaddss_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfnmaddss $2, $1, $0, $0 \0A\09 vfnmaddss $3, $1, $0, $0 \0A\09 vfnmaddss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -; -; VFNMSUB -; - -define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfnmsubpd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfnmsubpd_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfnmsubpd_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfnmsubpd $2, $1, $0, $0 \0A\09 vfnmsubpd $3, $1, $0, $0 \0A\09 vfnmsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfnmsubpd_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfnmsubpd_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfnmsubpd_256: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: vzeroupper -; BDVER34-NEXT: retq - tail call void asm "vfnmsubpd $2, $1, $0, $0 \0A\09 vfnmsubpd $3, $1, $0, $0 \0A\09 vfnmsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind - ret void -} - -define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfnmsubps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfnmsubps_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfnmsubps_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfnmsubps $2, $1, $0, $0 \0A\09 vfnmsubps $3, $1, $0, $0 \0A\09 vfnmsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} - -define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfnmsubps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfnmsubps_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfnmsubps_256: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER34-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: vzeroupper -; BDVER34-NEXT: retq - tail call void asm "vfnmsubps $2, $1, $0, $0 \0A\09 vfnmsubps $3, $1, $0, $0 \0A\09 vfnmsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind - ret void -} - -define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize { -; GENERIC-LABEL: test_vfnmsubsd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfnmsubsd_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfnmsubsd_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfnmsubsd $2, $1, $0, $0 \0A\09 vfnmsubsd $3, $1, $0, $0 \0A\09 vfnmsubsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind - ret void -} - -define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize { -; GENERIC-LABEL: test_vfnmsubss_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; GENERIC-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfnmsubss_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER34-LABEL: test_vfnmsubss_128: -; BDVER34: # %bb.0: -; BDVER34-NEXT: #APP -; BDVER34-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER34-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0 -; BDVER34-NEXT: #NO_APP -; BDVER34-NEXT: retq - tail call void asm "vfnmsubss $2, $1, $0, $0 \0A\09 vfnmsubss $3, $1, $0, $0 \0A\09 vfnmsubss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind - ret void -} Index: test/CodeGen/X86/fsgsbase-schedule.ll =================================================================== --- test/CodeGen/X86/fsgsbase-schedule.ll +++ test/CodeGen/X86/fsgsbase-schedule.ll @@ -1,411 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=fsgsbase | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=GLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=IVY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1 - -define i32 @test_x86_rdfsbase_32() { -; GENERIC-LABEL: test_x86_rdfsbase_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rdfsbasel %eax # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GLM-LABEL: test_x86_rdfsbase_32: -; GLM: # %bb.0: -; GLM-NEXT: rdfsbasel %eax # sched: [100:1.00] -; GLM-NEXT: retq # sched: [4:1.00] -; -; IVY-LABEL: test_x86_rdfsbase_32: -; IVY: # %bb.0: -; IVY-NEXT: rdfsbasel %eax # sched: [100:0.33] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_x86_rdfsbase_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: rdfsbasel %eax # sched: [100:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_x86_rdfsbase_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: rdfsbasel %eax # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_x86_rdfsbase_32: -; SKX: # %bb.0: -; SKX-NEXT: rdfsbasel %eax # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_x86_rdfsbase_32: -; KNL: # %bb.0: -; KNL-NEXT: rdfsbasel %eax # sched: [100:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; BDVER-LABEL: test_x86_rdfsbase_32: -; BDVER: # %bb.0: -; BDVER-NEXT: rdfsbasel %eax -; BDVER-NEXT: retq -; -; ZNVER1-LABEL: test_x86_rdfsbase_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: rdfsbasel %eax # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %res = call i32 @llvm.x86.rdfsbase.32() - ret i32 %res -} -declare i32 @llvm.x86.rdfsbase.32() nounwind readnone - -define i32 @test_x86_rdgsbase_32() { -; GENERIC-LABEL: test_x86_rdgsbase_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rdgsbasel %eax # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GLM-LABEL: test_x86_rdgsbase_32: -; GLM: # %bb.0: -; GLM-NEXT: rdgsbasel %eax # sched: [100:1.00] -; GLM-NEXT: retq # sched: [4:1.00] -; -; IVY-LABEL: test_x86_rdgsbase_32: -; IVY: # %bb.0: -; IVY-NEXT: rdgsbasel %eax # sched: [100:0.33] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_x86_rdgsbase_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: rdgsbasel %eax # sched: [100:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_x86_rdgsbase_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: rdgsbasel %eax # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_x86_rdgsbase_32: -; SKX: # %bb.0: -; SKX-NEXT: rdgsbasel %eax # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_x86_rdgsbase_32: -; KNL: # %bb.0: -; KNL-NEXT: rdgsbasel %eax # sched: [100:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; BDVER-LABEL: test_x86_rdgsbase_32: -; BDVER: # %bb.0: -; BDVER-NEXT: rdgsbasel %eax -; BDVER-NEXT: retq -; -; ZNVER1-LABEL: test_x86_rdgsbase_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: rdgsbasel %eax # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %res = call i32 @llvm.x86.rdgsbase.32() - ret i32 %res -} -declare i32 @llvm.x86.rdgsbase.32() nounwind readnone - -define i64 @test_x86_rdfsbase_64() { -; GENERIC-LABEL: test_x86_rdfsbase_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rdfsbaseq %rax # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GLM-LABEL: test_x86_rdfsbase_64: -; GLM: # %bb.0: -; GLM-NEXT: rdfsbaseq %rax # sched: [100:1.00] -; GLM-NEXT: retq # sched: [4:1.00] -; -; IVY-LABEL: test_x86_rdfsbase_64: -; IVY: # %bb.0: -; IVY-NEXT: rdfsbaseq %rax # sched: [100:0.33] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_x86_rdfsbase_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: rdfsbaseq %rax # sched: [100:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_x86_rdfsbase_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: rdfsbaseq %rax # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_x86_rdfsbase_64: -; SKX: # %bb.0: -; SKX-NEXT: rdfsbaseq %rax # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_x86_rdfsbase_64: -; KNL: # %bb.0: -; KNL-NEXT: rdfsbaseq %rax # sched: [100:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; BDVER-LABEL: test_x86_rdfsbase_64: -; BDVER: # %bb.0: -; BDVER-NEXT: rdfsbaseq %rax -; BDVER-NEXT: retq -; -; ZNVER1-LABEL: test_x86_rdfsbase_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: rdfsbaseq %rax # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %res = call i64 @llvm.x86.rdfsbase.64() - ret i64 %res -} -declare i64 @llvm.x86.rdfsbase.64() nounwind readnone - -define i64 @test_x86_rdgsbase_64() { -; GENERIC-LABEL: test_x86_rdgsbase_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rdgsbaseq %rax # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GLM-LABEL: test_x86_rdgsbase_64: -; GLM: # %bb.0: -; GLM-NEXT: rdgsbaseq %rax # sched: [100:1.00] -; GLM-NEXT: retq # sched: [4:1.00] -; -; IVY-LABEL: test_x86_rdgsbase_64: -; IVY: # %bb.0: -; IVY-NEXT: rdgsbaseq %rax # sched: [100:0.33] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_x86_rdgsbase_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: rdgsbaseq %rax # sched: [100:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_x86_rdgsbase_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: rdgsbaseq %rax # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_x86_rdgsbase_64: -; SKX: # %bb.0: -; SKX-NEXT: rdgsbaseq %rax # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_x86_rdgsbase_64: -; KNL: # %bb.0: -; KNL-NEXT: rdgsbaseq %rax # sched: [100:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; BDVER-LABEL: test_x86_rdgsbase_64: -; BDVER: # %bb.0: -; BDVER-NEXT: rdgsbaseq %rax -; BDVER-NEXT: retq -; -; ZNVER1-LABEL: test_x86_rdgsbase_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: rdgsbaseq %rax # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %res = call i64 @llvm.x86.rdgsbase.64() - ret i64 %res -} -declare i64 @llvm.x86.rdgsbase.64() nounwind readnone - -define void @test_x86_wrfsbase_32(i32 %x) { -; GENERIC-LABEL: test_x86_wrfsbase_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: wrfsbasel %edi # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GLM-LABEL: test_x86_wrfsbase_32: -; GLM: # %bb.0: -; GLM-NEXT: wrfsbasel %edi # sched: [100:1.00] -; GLM-NEXT: retq # sched: [4:1.00] -; -; IVY-LABEL: test_x86_wrfsbase_32: -; IVY: # %bb.0: -; IVY-NEXT: wrfsbasel %edi # sched: [100:0.33] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_x86_wrfsbase_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: wrfsbasel %edi # sched: [100:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_x86_wrfsbase_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: wrfsbasel %edi # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_x86_wrfsbase_32: -; SKX: # %bb.0: -; SKX-NEXT: wrfsbasel %edi # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_x86_wrfsbase_32: -; KNL: # %bb.0: -; KNL-NEXT: wrfsbasel %edi # sched: [100:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; BDVER-LABEL: test_x86_wrfsbase_32: -; BDVER: # %bb.0: -; BDVER-NEXT: wrfsbasel %edi -; BDVER-NEXT: retq -; -; ZNVER1-LABEL: test_x86_wrfsbase_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: wrfsbasel %edi # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.wrfsbase.32(i32 %x) - ret void -} -declare void @llvm.x86.wrfsbase.32(i32) nounwind readnone - -define void @test_x86_wrgsbase_32(i32 %x) { -; GENERIC-LABEL: test_x86_wrgsbase_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: wrgsbasel %edi # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GLM-LABEL: test_x86_wrgsbase_32: -; GLM: # %bb.0: -; GLM-NEXT: wrgsbasel %edi # sched: [100:1.00] -; GLM-NEXT: retq # sched: [4:1.00] -; -; IVY-LABEL: test_x86_wrgsbase_32: -; IVY: # %bb.0: -; IVY-NEXT: wrgsbasel %edi # sched: [100:0.33] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_x86_wrgsbase_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: wrgsbasel %edi # sched: [100:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_x86_wrgsbase_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: wrgsbasel %edi # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_x86_wrgsbase_32: -; SKX: # %bb.0: -; SKX-NEXT: wrgsbasel %edi # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_x86_wrgsbase_32: -; KNL: # %bb.0: -; KNL-NEXT: wrgsbasel %edi # sched: [100:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; BDVER-LABEL: test_x86_wrgsbase_32: -; BDVER: # %bb.0: -; BDVER-NEXT: wrgsbasel %edi -; BDVER-NEXT: retq -; -; ZNVER1-LABEL: test_x86_wrgsbase_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: wrgsbasel %edi # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.wrgsbase.32(i32 %x) - ret void -} -declare void @llvm.x86.wrgsbase.32(i32) nounwind readnone - -define void @test_x86_wrfsbase_64(i64 %x) { -; GENERIC-LABEL: test_x86_wrfsbase_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: wrfsbaseq %rdi # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GLM-LABEL: test_x86_wrfsbase_64: -; GLM: # %bb.0: -; GLM-NEXT: wrfsbaseq %rdi # sched: [100:1.00] -; GLM-NEXT: retq # sched: [4:1.00] -; -; IVY-LABEL: test_x86_wrfsbase_64: -; IVY: # %bb.0: -; IVY-NEXT: wrfsbaseq %rdi # sched: [100:0.33] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_x86_wrfsbase_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: wrfsbaseq %rdi # sched: [100:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_x86_wrfsbase_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: wrfsbaseq %rdi # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_x86_wrfsbase_64: -; SKX: # %bb.0: -; SKX-NEXT: wrfsbaseq %rdi # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_x86_wrfsbase_64: -; KNL: # %bb.0: -; KNL-NEXT: wrfsbaseq %rdi # sched: [100:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; BDVER-LABEL: test_x86_wrfsbase_64: -; BDVER: # %bb.0: -; BDVER-NEXT: wrfsbaseq %rdi -; BDVER-NEXT: retq -; -; ZNVER1-LABEL: test_x86_wrfsbase_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: wrfsbaseq %rdi # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.wrfsbase.64(i64 %x) - ret void -} -declare void @llvm.x86.wrfsbase.64(i64) nounwind readnone - -define void @test_x86_wrgsbase_64(i64 %x) { -; GENERIC-LABEL: test_x86_wrgsbase_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: wrgsbaseq %rdi # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GLM-LABEL: test_x86_wrgsbase_64: -; GLM: # %bb.0: -; GLM-NEXT: wrgsbaseq %rdi # sched: [100:1.00] -; GLM-NEXT: retq # sched: [4:1.00] -; -; IVY-LABEL: test_x86_wrgsbase_64: -; IVY: # %bb.0: -; IVY-NEXT: wrgsbaseq %rdi # sched: [100:0.33] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_x86_wrgsbase_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: wrgsbaseq %rdi # sched: [100:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_x86_wrgsbase_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: wrgsbaseq %rdi # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_x86_wrgsbase_64: -; SKX: # %bb.0: -; SKX-NEXT: wrgsbaseq %rdi # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: test_x86_wrgsbase_64: -; KNL: # %bb.0: -; KNL-NEXT: wrgsbaseq %rdi # sched: [100:0.25] -; KNL-NEXT: retq # sched: [7:1.00] -; -; BDVER-LABEL: test_x86_wrgsbase_64: -; BDVER: # %bb.0: -; BDVER-NEXT: wrgsbaseq %rdi -; BDVER-NEXT: retq -; -; ZNVER1-LABEL: test_x86_wrgsbase_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: wrgsbaseq %rdi # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.wrgsbase.64(i64 %x) - ret void -} -declare void @llvm.x86.wrgsbase.64(i64) nounwind readnone Index: test/CodeGen/X86/lea32-schedule.ll =================================================================== --- test/CodeGen/X86/lea32-schedule.ll +++ test/CodeGen/X86/lea32-schedule.ll @@ -1,898 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define i32 @test_lea_offset(i32) { -; GENERIC-LABEL: test_lea_offset: -; GENERIC: # %bb.0: -; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi -; GENERIC-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_offset: -; ATOM: # %bb.0: -; ATOM-NEXT: # kill: def $edi killed $edi def $rdi -; ATOM-NEXT: leal -24(%rdi), %eax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_offset: -; SLM: # %bb.0: -; SLM-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NEXT: leal -24(%rdi), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_offset: -; SANDY: # %bb.0: -; SANDY-NEXT: # kill: def $edi killed $edi def $rdi -; SANDY-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_offset: -; HASWELL: # %bb.0: -; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi -; HASWELL-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_offset: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi -; BROADWELL-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_offset: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi -; SKYLAKE-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_offset: -; BDVER2: # %bb.0: -; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BDVER2-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_offset: -; BTVER2: # %bb.0: -; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BTVER2-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_offset: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi -; ZNVER1-NEXT: leal -24(%rdi), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %2 = add nsw i32 %0, -24 - ret i32 %2 -} - -define i32 @test_lea_offset_big(i32) { -; GENERIC-LABEL: test_lea_offset_big: -; GENERIC: # %bb.0: -; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi -; GENERIC-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_offset_big: -; ATOM: # %bb.0: -; ATOM-NEXT: # kill: def $edi killed $edi def $rdi -; ATOM-NEXT: leal 1024(%rdi), %eax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_offset_big: -; SLM: # %bb.0: -; SLM-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NEXT: leal 1024(%rdi), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_offset_big: -; SANDY: # %bb.0: -; SANDY-NEXT: # kill: def $edi killed $edi def $rdi -; SANDY-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_offset_big: -; HASWELL: # %bb.0: -; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi -; HASWELL-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_offset_big: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi -; BROADWELL-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_offset_big: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi -; SKYLAKE-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_offset_big: -; BDVER2: # %bb.0: -; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BDVER2-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_offset_big: -; BTVER2: # %bb.0: -; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BTVER2-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_offset_big: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi -; ZNVER1-NEXT: leal 1024(%rdi), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %2 = add nsw i32 %0, 1024 - ret i32 %2 -} - -; Function Attrs: norecurse nounwind readnone uwtable -define i32 @test_lea_add(i32, i32) { -; GENERIC-LABEL: test_lea_add: -; GENERIC: # %bb.0: -; GENERIC-NEXT: # kill: def $esi killed $esi def $rsi -; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi -; GENERIC-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add: -; ATOM: # %bb.0: -; ATOM-NEXT: # kill: def $esi killed $esi def $rsi -; ATOM-NEXT: # kill: def $edi killed $edi def $rdi -; ATOM-NEXT: leal (%rdi,%rsi), %eax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_add: -; SLM: # %bb.0: -; SLM-NEXT: # kill: def $esi killed $esi def $rsi -; SLM-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NEXT: leal (%rdi,%rsi), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add: -; SANDY: # %bb.0: -; SANDY-NEXT: # kill: def $esi killed $esi def $rsi -; SANDY-NEXT: # kill: def $edi killed $edi def $rdi -; SANDY-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add: -; HASWELL: # %bb.0: -; HASWELL-NEXT: # kill: def $esi killed $esi def $rsi -; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi -; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_add: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: # kill: def $esi killed $esi def $rsi -; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi -; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_add: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: # kill: def $esi killed $esi def $rsi -; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi -; SKYLAKE-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_add: -; BDVER2: # %bb.0: -; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi -; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BDVER2-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_add: -; BTVER2: # %bb.0: -; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi -; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BTVER2-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: # kill: def $esi killed $esi def $rsi -; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi -; ZNVER1-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %3 = add nsw i32 %1, %0 - ret i32 %3 -} - -define i32 @test_lea_add_offset(i32, i32) { -; GENERIC-LABEL: test_lea_add_offset: -; GENERIC: # %bb.0: -; GENERIC-NEXT: # kill: def $esi killed $esi def $rsi -; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi -; GENERIC-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; GENERIC-NEXT: addl $16, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_offset: -; ATOM: # %bb.0: -; ATOM-NEXT: # kill: def $esi killed $esi def $rsi -; ATOM-NEXT: # kill: def $edi killed $edi def $rdi -; ATOM-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_add_offset: -; SLM: # %bb.0: -; SLM-NEXT: # kill: def $esi killed $esi def $rsi -; SLM-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_offset: -; SANDY: # %bb.0: -; SANDY-NEXT: # kill: def $esi killed $esi def $rsi -; SANDY-NEXT: # kill: def $edi killed $edi def $rdi -; SANDY-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; SANDY-NEXT: addl $16, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_offset: -; HASWELL: # %bb.0: -; HASWELL-NEXT: # kill: def $esi killed $esi def $rsi -; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi -; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; HASWELL-NEXT: addl $16, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_add_offset: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: # kill: def $esi killed $esi def $rsi -; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi -; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; BROADWELL-NEXT: addl $16, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_add_offset: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: # kill: def $esi killed $esi def $rsi -; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi -; SKYLAKE-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: addl $16, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_add_offset: -; BDVER2: # %bb.0: -; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi -; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BDVER2-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_add_offset: -; BTVER2: # %bb.0: -; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi -; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BTVER2-NEXT: leal 16(%rdi,%rsi), %eax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_offset: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: # kill: def $esi killed $esi def $rsi -; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi -; ZNVER1-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %3 = add i32 %0, 16 - %4 = add i32 %3, %1 - ret i32 %4 -} - -define i32 @test_lea_add_offset_big(i32, i32) { -; GENERIC-LABEL: test_lea_add_offset_big: -; GENERIC: # %bb.0: -; GENERIC-NEXT: # kill: def $esi killed $esi def $rsi -; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi -; GENERIC-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; GENERIC-NEXT: addl $-4096, %eax # imm = 0xF000 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_offset_big: -; ATOM: # %bb.0: -; ATOM-NEXT: # kill: def $esi killed $esi def $rsi -; ATOM-NEXT: # kill: def $edi killed $edi def $rdi -; ATOM-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_add_offset_big: -; SLM: # %bb.0: -; SLM-NEXT: # kill: def $esi killed $esi def $rsi -; SLM-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_offset_big: -; SANDY: # %bb.0: -; SANDY-NEXT: # kill: def $esi killed $esi def $rsi -; SANDY-NEXT: # kill: def $edi killed $edi def $rdi -; SANDY-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; SANDY-NEXT: addl $-4096, %eax # imm = 0xF000 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_offset_big: -; HASWELL: # %bb.0: -; HASWELL-NEXT: # kill: def $esi killed $esi def $rsi -; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi -; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; HASWELL-NEXT: addl $-4096, %eax # imm = 0xF000 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_add_offset_big: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: # kill: def $esi killed $esi def $rsi -; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi -; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; BROADWELL-NEXT: addl $-4096, %eax # imm = 0xF000 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_add_offset_big: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: # kill: def $esi killed $esi def $rsi -; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi -; SKYLAKE-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: addl $-4096, %eax # imm = 0xF000 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_add_offset_big: -; BDVER2: # %bb.0: -; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi -; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BDVER2-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_add_offset_big: -; BTVER2: # %bb.0: -; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi -; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BTVER2-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_offset_big: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: # kill: def $esi killed $esi def $rsi -; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi -; ZNVER1-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %3 = add i32 %0, -4096 - %4 = add i32 %3, %1 - ret i32 %4 -} - -define i32 @test_lea_mul(i32) { -; GENERIC-LABEL: test_lea_mul: -; GENERIC: # %bb.0: -; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi -; GENERIC-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_mul: -; ATOM: # %bb.0: -; ATOM-NEXT: # kill: def $edi killed $edi def $rdi -; ATOM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_mul: -; SLM: # %bb.0: -; SLM-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_mul: -; SANDY: # %bb.0: -; SANDY-NEXT: # kill: def $edi killed $edi def $rdi -; SANDY-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_mul: -; HASWELL: # %bb.0: -; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi -; HASWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_mul: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi -; BROADWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_mul: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi -; SKYLAKE-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_mul: -; BDVER2: # %bb.0: -; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BDVER2-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_mul: -; BTVER2: # %bb.0: -; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BTVER2-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_mul: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi -; ZNVER1-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %2 = mul nsw i32 %0, 3 - ret i32 %2 -} - -define i32 @test_lea_mul_offset(i32) { -; GENERIC-LABEL: test_lea_mul_offset: -; GENERIC: # %bb.0: -; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi -; GENERIC-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; GENERIC-NEXT: addl $-32, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_mul_offset: -; ATOM: # %bb.0: -; ATOM-NEXT: # kill: def $edi killed $edi def $rdi -; ATOM-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_mul_offset: -; SLM: # %bb.0: -; SLM-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_mul_offset: -; SANDY: # %bb.0: -; SANDY-NEXT: # kill: def $edi killed $edi def $rdi -; SANDY-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; SANDY-NEXT: addl $-32, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_mul_offset: -; HASWELL: # %bb.0: -; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi -; HASWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; HASWELL-NEXT: addl $-32, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_mul_offset: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi -; BROADWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; BROADWELL-NEXT: addl $-32, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_mul_offset: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi -; SKYLAKE-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: addl $-32, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_mul_offset: -; BDVER2: # %bb.0: -; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BDVER2-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_mul_offset: -; BTVER2: # %bb.0: -; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BTVER2-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_mul_offset: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi -; ZNVER1-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %2 = mul nsw i32 %0, 3 - %3 = add nsw i32 %2, -32 - ret i32 %3 -} - -define i32 @test_lea_mul_offset_big(i32) { -; GENERIC-LABEL: test_lea_mul_offset_big: -; GENERIC: # %bb.0: -; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi -; GENERIC-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; GENERIC-NEXT: addl $10000, %eax # imm = 0x2710 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_mul_offset_big: -; ATOM: # %bb.0: -; ATOM-NEXT: # kill: def $edi killed $edi def $rdi -; ATOM-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_mul_offset_big: -; SLM: # %bb.0: -; SLM-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_mul_offset_big: -; SANDY: # %bb.0: -; SANDY-NEXT: # kill: def $edi killed $edi def $rdi -; SANDY-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; SANDY-NEXT: addl $10000, %eax # imm = 0x2710 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_mul_offset_big: -; HASWELL: # %bb.0: -; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi -; HASWELL-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; HASWELL-NEXT: addl $10000, %eax # imm = 0x2710 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_mul_offset_big: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi -; BROADWELL-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; BROADWELL-NEXT: addl $10000, %eax # imm = 0x2710 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_mul_offset_big: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi -; SKYLAKE-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: addl $10000, %eax # imm = 0x2710 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_mul_offset_big: -; BDVER2: # %bb.0: -; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BDVER2-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_mul_offset_big: -; BTVER2: # %bb.0: -; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BTVER2-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_mul_offset_big: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi -; ZNVER1-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %2 = mul nsw i32 %0, 9 - %3 = add nsw i32 %2, 10000 - ret i32 %3 -} - -define i32 @test_lea_add_scale(i32, i32) { -; GENERIC-LABEL: test_lea_add_scale: -; GENERIC: # %bb.0: -; GENERIC-NEXT: # kill: def $esi killed $esi def $rsi -; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi -; GENERIC-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_scale: -; ATOM: # %bb.0: -; ATOM-NEXT: # kill: def $esi killed $esi def $rsi -; ATOM-NEXT: # kill: def $edi killed $edi def $rdi -; ATOM-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_add_scale: -; SLM: # %bb.0: -; SLM-NEXT: # kill: def $esi killed $esi def $rsi -; SLM-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_scale: -; SANDY: # %bb.0: -; SANDY-NEXT: # kill: def $esi killed $esi def $rsi -; SANDY-NEXT: # kill: def $edi killed $edi def $rdi -; SANDY-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_scale: -; HASWELL: # %bb.0: -; HASWELL-NEXT: # kill: def $esi killed $esi def $rsi -; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi -; HASWELL-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_add_scale: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: # kill: def $esi killed $esi def $rsi -; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi -; BROADWELL-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_add_scale: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: # kill: def $esi killed $esi def $rsi -; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi -; SKYLAKE-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_add_scale: -; BDVER2: # %bb.0: -; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi -; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BDVER2-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_add_scale: -; BTVER2: # %bb.0: -; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi -; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BTVER2-NEXT: leal (%rdi,%rsi,2), %eax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_scale: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: # kill: def $esi killed $esi def $rsi -; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi -; ZNVER1-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %3 = shl i32 %1, 1 - %4 = add nsw i32 %3, %0 - ret i32 %4 -} - -define i32 @test_lea_add_scale_offset(i32, i32) { -; GENERIC-LABEL: test_lea_add_scale_offset: -; GENERIC: # %bb.0: -; GENERIC-NEXT: # kill: def $esi killed $esi def $rsi -; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi -; GENERIC-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50] -; GENERIC-NEXT: addl $96, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_scale_offset: -; ATOM: # %bb.0: -; ATOM-NEXT: # kill: def $esi killed $esi def $rsi -; ATOM-NEXT: # kill: def $edi killed $edi def $rdi -; ATOM-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_add_scale_offset: -; SLM: # %bb.0: -; SLM-NEXT: # kill: def $esi killed $esi def $rsi -; SLM-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_scale_offset: -; SANDY: # %bb.0: -; SANDY-NEXT: # kill: def $esi killed $esi def $rsi -; SANDY-NEXT: # kill: def $edi killed $edi def $rdi -; SANDY-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50] -; SANDY-NEXT: addl $96, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_scale_offset: -; HASWELL: # %bb.0: -; HASWELL-NEXT: # kill: def $esi killed $esi def $rsi -; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi -; HASWELL-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50] -; HASWELL-NEXT: addl $96, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_add_scale_offset: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: # kill: def $esi killed $esi def $rsi -; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi -; BROADWELL-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50] -; BROADWELL-NEXT: addl $96, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_add_scale_offset: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: # kill: def $esi killed $esi def $rsi -; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi -; SKYLAKE-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: addl $96, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_add_scale_offset: -; BDVER2: # %bb.0: -; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi -; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BDVER2-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_add_scale_offset: -; BTVER2: # %bb.0: -; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi -; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BTVER2-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_scale_offset: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: # kill: def $esi killed $esi def $rsi -; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi -; ZNVER1-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %3 = shl i32 %1, 2 - %4 = add i32 %0, 96 - %5 = add i32 %4, %3 - ret i32 %5 -} - -define i32 @test_lea_add_scale_offset_big(i32, i32) { -; GENERIC-LABEL: test_lea_add_scale_offset_big: -; GENERIC: # %bb.0: -; GENERIC-NEXT: # kill: def $esi killed $esi def $rsi -; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi -; GENERIC-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50] -; GENERIC-NEXT: addl $-1200, %eax # imm = 0xFB50 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_scale_offset_big: -; ATOM: # %bb.0: -; ATOM-NEXT: # kill: def $esi killed $esi def $rsi -; ATOM-NEXT: # kill: def $edi killed $edi def $rdi -; ATOM-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_add_scale_offset_big: -; SLM: # %bb.0: -; SLM-NEXT: # kill: def $esi killed $esi def $rsi -; SLM-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_scale_offset_big: -; SANDY: # %bb.0: -; SANDY-NEXT: # kill: def $esi killed $esi def $rsi -; SANDY-NEXT: # kill: def $edi killed $edi def $rdi -; SANDY-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50] -; SANDY-NEXT: addl $-1200, %eax # imm = 0xFB50 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_scale_offset_big: -; HASWELL: # %bb.0: -; HASWELL-NEXT: # kill: def $esi killed $esi def $rsi -; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi -; HASWELL-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50] -; HASWELL-NEXT: addl $-1200, %eax # imm = 0xFB50 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_add_scale_offset_big: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: # kill: def $esi killed $esi def $rsi -; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi -; BROADWELL-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50] -; BROADWELL-NEXT: addl $-1200, %eax # imm = 0xFB50 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_add_scale_offset_big: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: # kill: def $esi killed $esi def $rsi -; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi -; SKYLAKE-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: addl $-1200, %eax # imm = 0xFB50 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_add_scale_offset_big: -; BDVER2: # %bb.0: -; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi -; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BDVER2-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_add_scale_offset_big: -; BTVER2: # %bb.0: -; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi -; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi -; BTVER2-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_scale_offset_big: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: # kill: def $esi killed $esi def $rsi -; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi -; ZNVER1-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %3 = shl i32 %1, 3 - %4 = add i32 %0, -1200 - %5 = add i32 %4, %3 - ret i32 %5 -} Index: test/CodeGen/X86/lea64-schedule.ll =================================================================== --- test/CodeGen/X86/lea64-schedule.ll +++ test/CodeGen/X86/lea64-schedule.ll @@ -1,728 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define i64 @test_lea_offset(i64) { -; GENERIC-LABEL: test_lea_offset: -; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_offset: -; ATOM: # %bb.0: -; ATOM-NEXT: leaq -24(%rdi), %rax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_offset: -; SLM: # %bb.0: -; SLM-NEXT: leaq -24(%rdi), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_offset: -; SANDY: # %bb.0: -; SANDY-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_offset: -; HASWELL: # %bb.0: -; HASWELL-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_offset: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_offset: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_offset: -; BDVER2: # %bb.0: -; BDVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_offset: -; BTVER2: # %bb.0: -; BTVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_offset: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq -24(%rdi), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %2 = add nsw i64 %0, -24 - ret i64 %2 -} - -define i64 @test_lea_offset_big(i64) { -; GENERIC-LABEL: test_lea_offset_big: -; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_offset_big: -; ATOM: # %bb.0: -; ATOM-NEXT: leaq 1024(%rdi), %rax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_offset_big: -; SLM: # %bb.0: -; SLM-NEXT: leaq 1024(%rdi), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_offset_big: -; SANDY: # %bb.0: -; SANDY-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_offset_big: -; HASWELL: # %bb.0: -; HASWELL-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_offset_big: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_offset_big: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_offset_big: -; BDVER2: # %bb.0: -; BDVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_offset_big: -; BTVER2: # %bb.0: -; BTVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_offset_big: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %2 = add nsw i64 %0, 1024 - ret i64 %2 -} - -; Function Attrs: norecurse nounwind readnone uwtable -define i64 @test_lea_add(i64, i64) { -; GENERIC-LABEL: test_lea_add: -; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add: -; ATOM: # %bb.0: -; ATOM-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_add: -; SLM: # %bb.0: -; SLM-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add: -; SANDY: # %bb.0: -; SANDY-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add: -; HASWELL: # %bb.0: -; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_add: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_add: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_add: -; BDVER2: # %bb.0: -; BDVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_add: -; BTVER2: # %bb.0: -; BTVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %3 = add nsw i64 %1, %0 - ret i64 %3 -} - -define i64 @test_lea_add_offset(i64, i64) { -; GENERIC-LABEL: test_lea_add_offset: -; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; GENERIC-NEXT: addq $16, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_offset: -; ATOM: # %bb.0: -; ATOM-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_add_offset: -; SLM: # %bb.0: -; SLM-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_offset: -; SANDY: # %bb.0: -; SANDY-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; SANDY-NEXT: addq $16, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_offset: -; HASWELL: # %bb.0: -; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; HASWELL-NEXT: addq $16, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_add_offset: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; BROADWELL-NEXT: addq $16, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_add_offset: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: addq $16, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_add_offset: -; BDVER2: # %bb.0: -; BDVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_add_offset: -; BTVER2: # %bb.0: -; BTVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_offset: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %3 = add i64 %0, 16 - %4 = add i64 %3, %1 - ret i64 %4 -} - -define i64 @test_lea_add_offset_big(i64, i64) { -; GENERIC-LABEL: test_lea_add_offset_big: -; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; GENERIC-NEXT: addq $-4096, %rax # imm = 0xF000 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_offset_big: -; ATOM: # %bb.0: -; ATOM-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_add_offset_big: -; SLM: # %bb.0: -; SLM-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_offset_big: -; SANDY: # %bb.0: -; SANDY-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; SANDY-NEXT: addq $-4096, %rax # imm = 0xF000 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_offset_big: -; HASWELL: # %bb.0: -; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; HASWELL-NEXT: addq $-4096, %rax # imm = 0xF000 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_add_offset_big: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; BROADWELL-NEXT: addq $-4096, %rax # imm = 0xF000 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_add_offset_big: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: addq $-4096, %rax # imm = 0xF000 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_add_offset_big: -; BDVER2: # %bb.0: -; BDVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_add_offset_big: -; BTVER2: # %bb.0: -; BTVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_offset_big: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %3 = add i64 %0, -4096 - %4 = add i64 %3, %1 - ret i64 %4 -} - -define i64 @test_lea_mul(i64) { -; GENERIC-LABEL: test_lea_mul: -; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_mul: -; ATOM: # %bb.0: -; ATOM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_mul: -; SLM: # %bb.0: -; SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_mul: -; SANDY: # %bb.0: -; SANDY-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_mul: -; HASWELL: # %bb.0: -; HASWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_mul: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_mul: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_mul: -; BDVER2: # %bb.0: -; BDVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_mul: -; BTVER2: # %bb.0: -; BTVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_mul: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %2 = mul nsw i64 %0, 3 - ret i64 %2 -} - -define i64 @test_lea_mul_offset(i64) { -; GENERIC-LABEL: test_lea_mul_offset: -; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; GENERIC-NEXT: addq $-32, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_mul_offset: -; ATOM: # %bb.0: -; ATOM-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_mul_offset: -; SLM: # %bb.0: -; SLM-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_mul_offset: -; SANDY: # %bb.0: -; SANDY-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; SANDY-NEXT: addq $-32, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_mul_offset: -; HASWELL: # %bb.0: -; HASWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; HASWELL-NEXT: addq $-32, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_mul_offset: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; BROADWELL-NEXT: addq $-32, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_mul_offset: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: addq $-32, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_mul_offset: -; BDVER2: # %bb.0: -; BDVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_mul_offset: -; BTVER2: # %bb.0: -; BTVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_mul_offset: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %2 = mul nsw i64 %0, 3 - %3 = add nsw i64 %2, -32 - ret i64 %3 -} - -define i64 @test_lea_mul_offset_big(i64) { -; GENERIC-LABEL: test_lea_mul_offset_big: -; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; GENERIC-NEXT: addq $10000, %rax # imm = 0x2710 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_mul_offset_big: -; ATOM: # %bb.0: -; ATOM-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_mul_offset_big: -; SLM: # %bb.0: -; SLM-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_mul_offset_big: -; SANDY: # %bb.0: -; SANDY-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; SANDY-NEXT: addq $10000, %rax # imm = 0x2710 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_mul_offset_big: -; HASWELL: # %bb.0: -; HASWELL-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; HASWELL-NEXT: addq $10000, %rax # imm = 0x2710 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_mul_offset_big: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; BROADWELL-NEXT: addq $10000, %rax # imm = 0x2710 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_mul_offset_big: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: addq $10000, %rax # imm = 0x2710 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_mul_offset_big: -; BDVER2: # %bb.0: -; BDVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_mul_offset_big: -; BTVER2: # %bb.0: -; BTVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_mul_offset_big: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %2 = mul nsw i64 %0, 9 - %3 = add nsw i64 %2, 10000 - ret i64 %3 -} - -define i64 @test_lea_add_scale(i64, i64) { -; GENERIC-LABEL: test_lea_add_scale: -; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_scale: -; ATOM: # %bb.0: -; ATOM-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_add_scale: -; SLM: # %bb.0: -; SLM-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_scale: -; SANDY: # %bb.0: -; SANDY-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_scale: -; HASWELL: # %bb.0: -; HASWELL-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_add_scale: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_add_scale: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_add_scale: -; BDVER2: # %bb.0: -; BDVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_add_scale: -; BTVER2: # %bb.0: -; BTVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_scale: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %3 = shl i64 %1, 1 - %4 = add nsw i64 %3, %0 - ret i64 %4 -} - -define i64 @test_lea_add_scale_offset(i64, i64) { -; GENERIC-LABEL: test_lea_add_scale_offset: -; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] -; GENERIC-NEXT: addq $96, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_scale_offset: -; ATOM: # %bb.0: -; ATOM-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_add_scale_offset: -; SLM: # %bb.0: -; SLM-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_scale_offset: -; SANDY: # %bb.0: -; SANDY-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] -; SANDY-NEXT: addq $96, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_scale_offset: -; HASWELL: # %bb.0: -; HASWELL-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] -; HASWELL-NEXT: addq $96, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_add_scale_offset: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] -; BROADWELL-NEXT: addq $96, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_add_scale_offset: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: addq $96, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_add_scale_offset: -; BDVER2: # %bb.0: -; BDVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_add_scale_offset: -; BTVER2: # %bb.0: -; BTVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_scale_offset: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %3 = shl i64 %1, 2 - %4 = add i64 %0, 96 - %5 = add i64 %4, %3 - ret i64 %5 -} - -define i64 @test_lea_add_scale_offset_big(i64, i64) { -; GENERIC-LABEL: test_lea_add_scale_offset_big: -; GENERIC: # %bb.0: -; GENERIC-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50] -; GENERIC-NEXT: addq $-1200, %rax # imm = 0xFB50 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_scale_offset_big: -; ATOM: # %bb.0: -; ATOM-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lea_add_scale_offset_big: -; SLM: # %bb.0: -; SLM-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_scale_offset_big: -; SANDY: # %bb.0: -; SANDY-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50] -; SANDY-NEXT: addq $-1200, %rax # imm = 0xFB50 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_scale_offset_big: -; HASWELL: # %bb.0: -; HASWELL-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50] -; HASWELL-NEXT: addq $-1200, %rax # imm = 0xFB50 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lea_add_scale_offset_big: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50] -; BROADWELL-NEXT: addq $-1200, %rax # imm = 0xFB50 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lea_add_scale_offset_big: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: addq $-1200, %rax # imm = 0xFB50 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lea_add_scale_offset_big: -; BDVER2: # %bb.0: -; BDVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lea_add_scale_offset_big: -; BTVER2: # %bb.0: -; BTVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_scale_offset_big: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %3 = shl i64 %1, 3 - %4 = add i64 %0, -1200 - %5 = add i64 %4, %3 - ret i64 %5 -} Index: test/CodeGen/X86/lwp-schedule.ll =================================================================== --- test/CodeGen/X86/lwp-schedule.ll +++ test/CodeGen/X86/lwp-schedule.ll @@ -1,299 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER1 -; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3 -; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4 - -define void @test_llwpcb(i8 *%a0) nounwind { -; GENERIC-LABEL: test_llwpcb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: llwpcb %rdi # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_llwpcb: -; BDVER12: # %bb.0: -; BDVER12-NEXT: llwpcb %rdi # sched: [100:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_llwpcb: -; BDVER3: # %bb.0: -; BDVER3-NEXT: llwpcb %rdi -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_llwpcb: -; BDVER4: # %bb.0: -; BDVER4-NEXT: llwpcb %rdi -; BDVER4-NEXT: retq - tail call void @llvm.x86.llwpcb(i8 *%a0) - ret void -} - -define i8* @test_slwpcb(i8 *%a0) nounwind { -; GENERIC-LABEL: test_slwpcb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: slwpcb %rax # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_slwpcb: -; BDVER12: # %bb.0: -; BDVER12-NEXT: slwpcb %rax # sched: [100:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_slwpcb: -; BDVER3: # %bb.0: -; BDVER3-NEXT: slwpcb %rax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_slwpcb: -; BDVER4: # %bb.0: -; BDVER4-NEXT: slwpcb %rax -; BDVER4-NEXT: retq - %1 = tail call i8* @llvm.x86.slwpcb() - ret i8 *%1 -} - -define i8 @test_lwpins32_rri(i32 %a0, i32 %a1) nounwind { -; GENERIC-LABEL: test_lwpins32_rri: -; GENERIC: # %bb.0: -; GENERIC-NEXT: addl %esi, %esi # sched: [1:0.33] -; GENERIC-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF -; GENERIC-NEXT: # sched: [100:0.33] -; GENERIC-NEXT: setb %al # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_lwpins32_rri: -; BDVER12: # %bb.0: -; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.50] -; BDVER12-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF -; BDVER12-NEXT: # sched: [100:0.50] -; BDVER12-NEXT: setb %al # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_lwpins32_rri: -; BDVER3: # %bb.0: -; BDVER3-NEXT: addl %esi, %esi -; BDVER3-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF -; BDVER3-NEXT: setb %al -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_lwpins32_rri: -; BDVER4: # %bb.0: -; BDVER4-NEXT: addl %esi, %esi -; BDVER4-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF -; BDVER4-NEXT: setb %al -; BDVER4-NEXT: retq - %1 = add i32 %a1, %a1 - %2 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %1, i32 2309737967) - ret i8 %2 -} - -define i8 @test_lwpins32_rmi(i32 %a0, i32 *%p1) nounwind { -; GENERIC-LABEL: test_lwpins32_rmi: -; GENERIC: # %bb.0: -; GENERIC-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210 -; GENERIC-NEXT: # sched: [100:0.33] -; GENERIC-NEXT: setb %al # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_lwpins32_rmi: -; BDVER12: # %bb.0: -; BDVER12-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210 -; BDVER12-NEXT: # sched: [100:0.50] -; BDVER12-NEXT: setb %al # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_lwpins32_rmi: -; BDVER3: # %bb.0: -; BDVER3-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210 -; BDVER3-NEXT: setb %al -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_lwpins32_rmi: -; BDVER4: # %bb.0: -; BDVER4-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210 -; BDVER4-NEXT: setb %al -; BDVER4-NEXT: retq - %a1 = load i32, i32 *%p1 - %1 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %a1, i32 1985229328) - ret i8 %1 -} - -define i8 @test_lwpins64_rri(i64 %a0, i32 %a1) nounwind { -; GENERIC-LABEL: test_lwpins64_rri: -; GENERIC: # %bb.0: -; GENERIC-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF -; GENERIC-NEXT: # sched: [100:0.33] -; GENERIC-NEXT: setb %al # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_lwpins64_rri: -; BDVER12: # %bb.0: -; BDVER12-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF -; BDVER12-NEXT: # sched: [100:0.50] -; BDVER12-NEXT: setb %al # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_lwpins64_rri: -; BDVER3: # %bb.0: -; BDVER3-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF -; BDVER3-NEXT: setb %al -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_lwpins64_rri: -; BDVER4: # %bb.0: -; BDVER4-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF -; BDVER4-NEXT: setb %al -; BDVER4-NEXT: retq - %1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 2309737967) - ret i8 %1 -} - -define i8 @test_lwpins64_rmi(i64 %a0, i32 *%p1) nounwind { -; GENERIC-LABEL: test_lwpins64_rmi: -; GENERIC: # %bb.0: -; GENERIC-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210 -; GENERIC-NEXT: # sched: [100:0.33] -; GENERIC-NEXT: setb %al # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_lwpins64_rmi: -; BDVER12: # %bb.0: -; BDVER12-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210 -; BDVER12-NEXT: # sched: [100:0.50] -; BDVER12-NEXT: setb %al # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_lwpins64_rmi: -; BDVER3: # %bb.0: -; BDVER3-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210 -; BDVER3-NEXT: setb %al -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_lwpins64_rmi: -; BDVER4: # %bb.0: -; BDVER4-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210 -; BDVER4-NEXT: setb %al -; BDVER4-NEXT: retq - %a1 = load i32, i32 *%p1 - %1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 1985229328) - ret i8 %1 -} - -define void @test_lwpval32_rri(i32 %a0, i32 %a1) nounwind { -; GENERIC-LABEL: test_lwpval32_rri: -; GENERIC: # %bb.0: -; GENERIC-NEXT: addl %esi, %esi # sched: [1:0.33] -; GENERIC-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98 -; GENERIC-NEXT: # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_lwpval32_rri: -; BDVER12: # %bb.0: -; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.50] -; BDVER12-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98 -; BDVER12-NEXT: # sched: [100:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_lwpval32_rri: -; BDVER3: # %bb.0: -; BDVER3-NEXT: addl %esi, %esi -; BDVER3-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98 -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_lwpval32_rri: -; BDVER4: # %bb.0: -; BDVER4-NEXT: addl %esi, %esi -; BDVER4-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98 -; BDVER4-NEXT: retq - %1 = add i32 %a1, %a1 - tail call void @llvm.x86.lwpval32(i32 %a0, i32 %1, i32 4275878552) - ret void -} - -define void @test_lwpval32_rmi(i32 %a0, i32 *%p1) nounwind { -; GENERIC-LABEL: test_lwpval32_rmi: -; GENERIC: # %bb.0: -; GENERIC-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678 -; GENERIC-NEXT: # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_lwpval32_rmi: -; BDVER12: # %bb.0: -; BDVER12-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678 -; BDVER12-NEXT: # sched: [100:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_lwpval32_rmi: -; BDVER3: # %bb.0: -; BDVER3-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678 -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_lwpval32_rmi: -; BDVER4: # %bb.0: -; BDVER4-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678 -; BDVER4-NEXT: retq - %a1 = load i32, i32 *%p1 - tail call void @llvm.x86.lwpval32(i32 %a0, i32 %a1, i32 305419896) - ret void -} - -define void @test_lwpval64_rri(i64 %a0, i32 %a1) nounwind { -; GENERIC-LABEL: test_lwpval64_rri: -; GENERIC: # %bb.0: -; GENERIC-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98 -; GENERIC-NEXT: # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_lwpval64_rri: -; BDVER12: # %bb.0: -; BDVER12-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98 -; BDVER12-NEXT: # sched: [100:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_lwpval64_rri: -; BDVER3: # %bb.0: -; BDVER3-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98 -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_lwpval64_rri: -; BDVER4: # %bb.0: -; BDVER4-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98 -; BDVER4-NEXT: retq - tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 4275878552) - ret void -} - -define void @test_lwpval64_rmi(i64 %a0, i32 *%p1) nounwind { -; GENERIC-LABEL: test_lwpval64_rmi: -; GENERIC: # %bb.0: -; GENERIC-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678 -; GENERIC-NEXT: # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_lwpval64_rmi: -; BDVER12: # %bb.0: -; BDVER12-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678 -; BDVER12-NEXT: # sched: [100:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_lwpval64_rmi: -; BDVER3: # %bb.0: -; BDVER3-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678 -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_lwpval64_rmi: -; BDVER4: # %bb.0: -; BDVER4-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678 -; BDVER4-NEXT: retq - %a1 = load i32, i32 *%p1 - tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 305419896) - ret void -} - -declare void @llvm.x86.llwpcb(i8*) nounwind -declare i8* @llvm.x86.slwpcb() nounwind -declare i8 @llvm.x86.lwpins32(i32, i32, i32) nounwind -declare i8 @llvm.x86.lwpins64(i64, i32, i32) nounwind -declare void @llvm.x86.lwpval32(i32, i32, i32) nounwind -declare void @llvm.x86.lwpval64(i64, i32, i32) nounwind Index: test/CodeGen/X86/lzcnt-schedule.ll =================================================================== --- test/CodeGen/X86/lzcnt-schedule.ll +++ test/CodeGen/X86/lzcnt-schedule.ll @@ -1,187 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define i16 @test_ctlz_i16(i16 zeroext %a0, i16 *%a1) { -; GENERIC-LABEL: test_ctlz_i16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00] -; GENERIC-NEXT: lzcntw %di, %ax # sched: [3:1.00] -; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_ctlz_i16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00] -; HASWELL-NEXT: lzcntw %di, %ax # sched: [3:1.00] -; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ctlz_i16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00] -; BROADWELL-NEXT: lzcntw %di, %ax # sched: [3:1.00] -; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ctlz_i16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00] -; SKYLAKE-NEXT: lzcntw %di, %ax # sched: [3:1.00] -; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_ctlz_i16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: lzcntw (%rsi), %cx # sched: [6:0.50] -; BDVER2-NEXT: lzcntw %di, %ax # sched: [2:0.50] -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_ctlz_i16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: lzcntw (%rsi), %cx # sched: [4:1.00] -; BTVER2-NEXT: lzcntw %di, %ax # sched: [1:0.50] -; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ctlz_i16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: lzcntw (%rsi), %cx # sched: [6:0.50] -; ZNVER1-NEXT: lzcntw %di, %ax # sched: [2:0.25] -; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i16, i16 *%a1 - %2 = tail call i16 @llvm.ctlz.i16( i16 %1, i1 false ) - %3 = tail call i16 @llvm.ctlz.i16( i16 %a0, i1 false ) - %4 = or i16 %2, %3 - ret i16 %4 -} -declare i16 @llvm.ctlz.i16(i16, i1) - -define i32 @test_ctlz_i32(i32 %a0, i32 *%a1) { -; GENERIC-LABEL: test_ctlz_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00] -; GENERIC-NEXT: lzcntl %edi, %eax # sched: [3:1.00] -; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_ctlz_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00] -; HASWELL-NEXT: lzcntl %edi, %eax # sched: [3:1.00] -; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ctlz_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00] -; BROADWELL-NEXT: lzcntl %edi, %eax # sched: [3:1.00] -; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ctlz_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00] -; SKYLAKE-NEXT: lzcntl %edi, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_ctlz_i32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: lzcntl (%rsi), %ecx # sched: [6:0.50] -; BDVER2-NEXT: lzcntl %edi, %eax # sched: [2:0.50] -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_ctlz_i32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: lzcntl (%rsi), %ecx # sched: [4:1.00] -; BTVER2-NEXT: lzcntl %edi, %eax # sched: [1:0.50] -; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ctlz_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: lzcntl (%rsi), %ecx # sched: [6:0.50] -; ZNVER1-NEXT: lzcntl %edi, %eax # sched: [2:0.25] -; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a1 - %2 = tail call i32 @llvm.ctlz.i32( i32 %1, i1 false ) - %3 = tail call i32 @llvm.ctlz.i32( i32 %a0, i1 false ) - %4 = or i32 %2, %3 - ret i32 %4 -} -declare i32 @llvm.ctlz.i32(i32, i1) - -define i64 @test_ctlz_i64(i64 %a0, i64 *%a1) { -; GENERIC-LABEL: test_ctlz_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00] -; GENERIC-NEXT: lzcntq %rdi, %rax # sched: [3:1.00] -; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_ctlz_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00] -; HASWELL-NEXT: lzcntq %rdi, %rax # sched: [3:1.00] -; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ctlz_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00] -; BROADWELL-NEXT: lzcntq %rdi, %rax # sched: [3:1.00] -; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ctlz_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00] -; SKYLAKE-NEXT: lzcntq %rdi, %rax # sched: [3:1.00] -; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_ctlz_i64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: lzcntq (%rsi), %rcx # sched: [6:0.50] -; BDVER2-NEXT: lzcntq %rdi, %rax # sched: [2:0.50] -; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_ctlz_i64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: lzcntq (%rsi), %rcx # sched: [4:1.00] -; BTVER2-NEXT: lzcntq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ctlz_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: lzcntq (%rsi), %rcx # sched: [6:0.50] -; ZNVER1-NEXT: lzcntq %rdi, %rax # sched: [2:0.25] -; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a1 - %2 = tail call i64 @llvm.ctlz.i64( i64 %1, i1 false ) - %3 = tail call i64 @llvm.ctlz.i64( i64 %a0, i1 false ) - %4 = or i64 %2, %3 - ret i64 %4 -} -declare i64 @llvm.ctlz.i64(i64, i1) Index: test/CodeGen/X86/mmx-schedule.ll =================================================================== --- test/CodeGen/X86/mmx-schedule.ll +++ test/CodeGen/X86/mmx-schedule.ll @@ -1,7559 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { -; GENERIC-LABEL: test_cvtpd2pi: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] -; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtpd2pi: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [8:4.00] -; ATOM-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [7:3.50] -; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtpd2pi: -; SLM: # %bb.0: -; SLM-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [7:1.00] -; SLM-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:0.50] -; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cvtpd2pi: -; SANDY: # %bb.0: -; SANDY-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] -; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cvtpd2pi: -; HASWELL: # %bb.0: -; HASWELL-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] -; HASWELL-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] -; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtpd2pi: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:1.00] -; BROADWELL-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [9:1.00] -; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtpd2pi: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [5:1.00] -; SKYLAKE-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [11:1.00] -; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtpd2pi: -; SKX: # %bb.0: -; SKX-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [5:1.00] -; SKX-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [11:1.00] -; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cvtpd2pi: -; BDVER2: # %bb.0: -; BDVER2-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [13:1.00] -; BDVER2-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [6:1.00] -; BDVER2-NEXT: por %mm0, %mm1 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm1, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cvtpd2pi: -; BTVER2: # %bb.0: -; BTVER2-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [8:1.00] -; BTVER2-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [3:1.00] -; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cvtpd2pi: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [12:1.00] -; ZNVER1-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:1.00] -; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) - %2 = load <2 x double>, <2 x double> *%a1, align 16 - %3 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %2) - %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) - %5 = bitcast x86_mmx %4 to i64 - ret i64 %5 -} -declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone - -define <2 x double> @test_cvtpi2pd(x86_mmx %a0, x86_mmx* %a1) optsize { -; GENERIC-LABEL: test_cvtpi2pd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtpi2pd: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [8:4.00] -; ATOM-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [7:3.50] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtpi2pd: -; SLM: # %bb.0: -; SLM-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cvtpi2pd: -; SANDY: # %bb.0: -; SANDY-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cvtpi2pd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtpi2pd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [9:1.00] -; BROADWELL-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:1.00] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtpi2pd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtpi2pd: -; SKX: # %bb.0: -; SKX-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:0.50] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cvtpi2pd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [13:1.00] -; BDVER2-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [6:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cvtpi2pd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cvtpi2pd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0) - %2 = load x86_mmx, x86_mmx *%a1, align 8 - %3 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %2) - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} -declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone - -define <4 x float> @test_cvtpi2ps(x86_mmx %a0, x86_mmx* %a1, <4 x float> %a2, <4 x float> %a3) optsize { -; GENERIC-LABEL: test_cvtpi2ps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtpi2ps: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [5:5.00] -; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtpi2ps: -; SLM: # %bb.0: -; SLM-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [7:1.00] -; SLM-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [4:0.50] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cvtpi2ps: -; SANDY: # %bb.0: -; SANDY-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cvtpi2ps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtpi2ps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtpi2ps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtpi2ps: -; SKX: # %bb.0: -; SKX-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cvtpi2ps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cvtpi2ps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cvtpi2ps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a2, x86_mmx %a0) - %2 = load x86_mmx, x86_mmx *%a1, align 8 - %3 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a3, x86_mmx %2) - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} -declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone - -define i64 @test_cvtps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { -; GENERIC-LABEL: test_cvtps2pi: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] -; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; GENERIC-NEXT: movq %mm1, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtps2pi: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:5.00] -; ATOM-NEXT: cvtps2pi (%rdi), %mm1 # sched: [5:5.00] -; ATOM-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; ATOM-NEXT: movq %mm1, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtps2pi: -; SLM: # %bb.0: -; SLM-NEXT: cvtps2pi (%rdi), %mm1 # sched: [7:1.00] -; SLM-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:0.50] -; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cvtps2pi: -; SANDY: # %bb.0: -; SANDY-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] -; SANDY-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] -; SANDY-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; SANDY-NEXT: movq %mm1, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cvtps2pi: -; HASWELL: # %bb.0: -; HASWELL-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] -; HASWELL-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00] -; HASWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; HASWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtps2pi: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] -; BROADWELL-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00] -; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtps2pi: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:1.00] -; SKYLAKE-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:0.50] -; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtps2pi: -; SKX: # %bb.0: -; SKX-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:1.00] -; SKX-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:0.50] -; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cvtps2pi: -; BDVER2: # %bb.0: -; BDVER2-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] -; BDVER2-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] -; BDVER2-NEXT: por %mm0, %mm1 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm1, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cvtps2pi: -; BTVER2: # %bb.0: -; BTVER2-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00] -; BTVER2-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] -; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cvtps2pi: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: cvtps2pi (%rdi), %mm1 # sched: [12:1.00] -; ZNVER1-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] -; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %2) - %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) - %5 = bitcast x86_mmx %4 to i64 - ret i64 %5 -} -declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone - -define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { -; GENERIC-LABEL: test_cvttpd2pi: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] -; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvttpd2pi: -; ATOM: # %bb.0: -; ATOM-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [8:4.00] -; ATOM-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [7:3.50] -; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvttpd2pi: -; SLM: # %bb.0: -; SLM-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [7:1.00] -; SLM-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:0.50] -; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cvttpd2pi: -; SANDY: # %bb.0: -; SANDY-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] -; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cvttpd2pi: -; HASWELL: # %bb.0: -; HASWELL-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] -; HASWELL-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] -; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttpd2pi: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:1.00] -; BROADWELL-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [9:1.00] -; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttpd2pi: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [5:1.00] -; SKYLAKE-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [11:1.00] -; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttpd2pi: -; SKX: # %bb.0: -; SKX-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [5:1.00] -; SKX-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [11:1.00] -; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cvttpd2pi: -; BDVER2: # %bb.0: -; BDVER2-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [13:1.00] -; BDVER2-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [6:1.00] -; BDVER2-NEXT: por %mm0, %mm1 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm1, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cvttpd2pi: -; BTVER2: # %bb.0: -; BTVER2-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [8:1.00] -; BTVER2-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [3:1.00] -; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cvttpd2pi: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [12:1.00] -; ZNVER1-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:1.00] -; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) - %2 = load <2 x double>, <2 x double> *%a1, align 16 - %3 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %2) - %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) - %5 = bitcast x86_mmx %4 to i64 - ret i64 %5 -} -declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone - -define i64 @test_cvttps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { -; GENERIC-LABEL: test_cvttps2pi: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] -; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; GENERIC-NEXT: movq %mm1, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvttps2pi: -; ATOM: # %bb.0: -; ATOM-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:5.00] -; ATOM-NEXT: cvttps2pi (%rdi), %mm1 # sched: [5:5.00] -; ATOM-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; ATOM-NEXT: movq %mm1, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvttps2pi: -; SLM: # %bb.0: -; SLM-NEXT: cvttps2pi (%rdi), %mm1 # sched: [7:1.00] -; SLM-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:0.50] -; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cvttps2pi: -; SANDY: # %bb.0: -; SANDY-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] -; SANDY-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] -; SANDY-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; SANDY-NEXT: movq %mm1, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cvttps2pi: -; HASWELL: # %bb.0: -; HASWELL-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] -; HASWELL-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00] -; HASWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; HASWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttps2pi: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] -; BROADWELL-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00] -; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttps2pi: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:1.00] -; SKYLAKE-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:0.50] -; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttps2pi: -; SKX: # %bb.0: -; SKX-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:1.00] -; SKX-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:0.50] -; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cvttps2pi: -; BDVER2: # %bb.0: -; BDVER2-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] -; BDVER2-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] -; BDVER2-NEXT: por %mm0, %mm1 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm1, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cvttps2pi: -; BTVER2: # %bb.0: -; BTVER2-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00] -; BTVER2-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] -; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cvttps2pi: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: cvttps2pi (%rdi), %mm1 # sched: [12:1.00] -; ZNVER1-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] -; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0) - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %2) - %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) - %5 = bitcast x86_mmx %4 to i64 - ret i64 %5 -} -declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone - -define void @test_emms() optsize { -; GENERIC-LABEL: test_emms: -; GENERIC: # %bb.0: -; GENERIC-NEXT: emms # sched: [31:10.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_emms: -; ATOM: # %bb.0: -; ATOM-NEXT: emms # sched: [5:2.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_emms: -; SLM: # %bb.0: -; SLM-NEXT: emms # sched: [10:5.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_emms: -; SANDY: # %bb.0: -; SANDY-NEXT: emms # sched: [31:10.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_emms: -; HASWELL: # %bb.0: -; HASWELL-NEXT: emms # sched: [31:10.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_emms: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: emms # sched: [31:10.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_emms: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: emms # sched: [10:4.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_emms: -; SKX: # %bb.0: -; SKX-NEXT: emms # sched: [10:4.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_emms: -; BDVER2: # %bb.0: -; BDVER2-NEXT: emms # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_emms: -; BTVER2: # %bb.0: -; BTVER2-NEXT: emms # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_emms: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: emms # sched: [2:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.mmx.emms() - ret void -} -declare void @llvm.x86.mmx.emms() - -define void @test_maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2) optsize { -; GENERIC-LABEL: test_maskmovq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_maskmovq: -; ATOM: # %bb.0: -; ATOM-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_maskmovq: -; SLM: # %bb.0: -; SLM-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_maskmovq: -; SANDY: # %bb.0: -; SANDY-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_maskmovq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maskmovq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maskmovq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maskmovq: -; SKX: # %bb.0: -; SKX-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_maskmovq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: maskmovq %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_maskmovq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: maskmovq %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_maskmovq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: maskmovq %mm1, %mm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.mmx.maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2) - ret void -} -declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind - -define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_movd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movd %edi, %mm1 # sched: [1:1.00] -; GENERIC-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] -; GENERIC-NEXT: paddd %mm1, %mm2 # sched: [3:1.00] -; GENERIC-NEXT: paddd %mm2, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: movd %mm2, %ecx # sched: [2:1.00] -; GENERIC-NEXT: movd %mm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movd: -; ATOM: # %bb.0: -; ATOM-NEXT: movd %edi, %mm1 # sched: [1:1.00] -; ATOM-NEXT: movd (%rsi), %mm2 # sched: [1:1.00] -; ATOM-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] -; ATOM-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movd %mm2, %ecx # sched: [3:3.00] -; ATOM-NEXT: movd %mm0, %eax # sched: [3:3.00] -; ATOM-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movd: -; SLM: # %bb.0: -; SLM-NEXT: movd (%rsi), %mm2 # sched: [3:1.00] -; SLM-NEXT: movd %edi, %mm1 # sched: [1:0.50] -; SLM-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] -; SLM-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] -; SLM-NEXT: movd %mm2, %ecx # sched: [1:0.50] -; SLM-NEXT: movd %mm0, %eax # sched: [1:0.50] -; SLM-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_movd: -; SANDY: # %bb.0: -; SANDY-NEXT: movd %edi, %mm1 # sched: [1:1.00] -; SANDY-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] -; SANDY-NEXT: paddd %mm1, %mm2 # sched: [3:1.00] -; SANDY-NEXT: paddd %mm2, %mm0 # sched: [3:1.00] -; SANDY-NEXT: movd %mm2, %ecx # sched: [2:1.00] -; SANDY-NEXT: movd %mm0, %eax # sched: [2:1.00] -; SANDY-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movd %edi, %mm1 # sched: [1:1.00] -; HASWELL-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] -; HASWELL-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] -; HASWELL-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: movd %mm2, %ecx # sched: [1:1.00] -; HASWELL-NEXT: movd %mm0, %eax # sched: [1:1.00] -; HASWELL-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movd %edi, %mm1 # sched: [1:1.00] -; BROADWELL-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] -; BROADWELL-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] -; BROADWELL-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: movd %mm2, %ecx # sched: [1:1.00] -; BROADWELL-NEXT: movd %mm0, %eax # sched: [1:1.00] -; BROADWELL-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movd %edi, %mm1 # sched: [1:1.00] -; SKYLAKE-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] -; SKYLAKE-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] -; SKYLAKE-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movd %mm2, %ecx # sched: [2:1.00] -; SKYLAKE-NEXT: movd %mm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movd: -; SKX: # %bb.0: -; SKX-NEXT: movd %edi, %mm1 # sched: [1:1.00] -; SKX-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] -; SKX-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] -; SKX-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] -; SKX-NEXT: movd %mm2, %ecx # sched: [2:1.00] -; SKX-NEXT: movd %mm0, %eax # sched: [2:1.00] -; SKX-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movd %edi, %mm1 # sched: [10:0.50] -; BDVER2-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] -; BDVER2-NEXT: paddd %mm1, %mm2 # sched: [2:0.50] -; BDVER2-NEXT: paddd %mm2, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movd %mm2, %ecx # sched: [10:1.00] -; BDVER2-NEXT: movd %mm0, %eax # sched: [10:1.00] -; BDVER2-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movd %edi, %mm1 # sched: [8:0.50] -; BTVER2-NEXT: movd (%rsi), %mm2 # sched: [5:1.00] -; BTVER2-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] -; BTVER2-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movd %mm2, %ecx # sched: [4:1.00] -; BTVER2-NEXT: movd %mm0, %eax # sched: [4:1.00] -; BTVER2-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movd (%rsi), %mm2 # sched: [8:0.50] -; ZNVER1-NEXT: movd %edi, %mm1 # sched: [3:1.00] -; ZNVER1-NEXT: paddd %mm1, %mm2 # sched: [1:0.25] -; ZNVER1-NEXT: paddd %mm2, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movd %mm2, %ecx # sched: [2:1.00] -; ZNVER1-NEXT: movd %mm0, %eax # sched: [2:1.00] -; ZNVER1-NEXT: movl %ecx, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <2 x i32> undef, i32 %a1, i32 0 - %2 = bitcast <2 x i32> %1 to x86_mmx - %3 = load i32, i32 *%a2 - %4 = insertelement <2 x i32> undef, i32 %3, i32 0 - %5 = bitcast <2 x i32> %4 to x86_mmx - %6 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %5) - %7 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %6) - %8 = bitcast x86_mmx %6 to <2 x i32> - %9 = bitcast x86_mmx %7 to <2 x i32> - %10 = extractelement <2 x i32> %8, i32 0 - %11 = extractelement <2 x i32> %9, i32 0 - store i32 %10, i32* %a2 - ret i32 %11 -} - -define i64 @test_movdq2q(<2 x i64> %a0) optsize { -; GENERIC-LABEL: test_movdq2q: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] -; GENERIC-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movdq2q: -; ATOM: # %bb.0: -; ATOM-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50] -; ATOM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movdq2q: -; SLM: # %bb.0: -; SLM-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50] -; SLM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_movdq2q: -; SANDY: # %bb.0: -; SANDY-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] -; SANDY-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movdq2q: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.67] -; HASWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movdq2q: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.67] -; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movdq2q: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] -; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movdq2q: -; SKX: # %bb.0: -; SKX-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] -; SKX-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movdq2q: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: paddd %mm0, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movdq2q: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movdq2q: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: paddd %mm0, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = extractelement <2 x i64> %a0, i32 0 - %2 = bitcast i64 %1 to x86_mmx - %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} - -define void @test_movntq(x86_mmx* %a0, x86_mmx %a1) optsize { -; GENERIC-LABEL: test_movntq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movntq: -; ATOM: # %bb.0: -; ATOM-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movntq: -; SLM: # %bb.0: -; SLM-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_movntq: -; SANDY: # %bb.0: -; SANDY-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movntq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movntq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movntq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movntq: -; SKX: # %bb.0: -; SKX-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movntq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movntq %mm0, (%rdi) # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movntq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movntq %mm0, (%rdi) # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movntq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movntq %mm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.mmx.movnt.dq(x86_mmx* %a0, x86_mmx %a1) - ret void -} -declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind - -define void @test_movq(i64 *%a0) { -; GENERIC-LABEL: test_movq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] -; GENERIC-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movq: -; ATOM: # %bb.0: -; ATOM-NEXT: movq (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movq: -; SLM: # %bb.0: -; SLM-NEXT: movq (%rdi), %mm0 # sched: [3:1.00] -; SLM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; SLM-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_movq: -; SANDY: # %bb.0: -; SANDY-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] -; SANDY-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] -; SANDY-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] -; HASWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] -; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] -; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movq: -; SKX: # %bb.0: -; SKX-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] -; SKX-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; SKX-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] -; BDVER2-NEXT: paddd %mm0, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, (%rdi) # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movq (%rdi), %mm0 # sched: [5:1.00] -; BTVER2-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, (%rdi) # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movq (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: paddd %mm0, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64* %a0, align 8 - %2 = bitcast i64 %1 to x86_mmx - %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - store i64 %4, i64* %a0, align 8 - ret void -} - -define <2 x i64> @test_movq2dq(x86_mmx %a0) optsize { -; GENERIC-LABEL: test_movq2dq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movq2dq: -; ATOM: # %bb.0: -; ATOM-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movq2dq: -; SLM: # %bb.0: -; SLM-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_movq2dq: -; SANDY: # %bb.0: -; SANDY-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movq2dq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movq2dq %mm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movq2dq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movq2dq %mm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movq2dq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movq2dq %mm0, %xmm0 # sched: [2:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movq2dq: -; SKX: # %bb.0: -; SKX-NEXT: movq2dq %mm0, %xmm0 # sched: [2:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movq2dq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movq2dq %mm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movq2dq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movq2dq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast x86_mmx %a0 to i64 - %2 = insertelement <2 x i64> undef, i64 %1, i32 0 - ret <2 x i64> %2 -} - -define i64 @test_pabsb(x86_mmx *%a0) optsize { -; GENERIC-LABEL: test_pabsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pabsb: -; ATOM: # %bb.0: -; ATOM-NEXT: pabsb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pabsb: -; SLM: # %bb.0: -; SLM-NEXT: pabsb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pabsb: -; SANDY: # %bb.0: -; SANDY-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pabsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pabsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pabsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pabsb: -; SKX: # %bb.0: -; SKX-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pabsb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pabsb (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: pabsb %mm0, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pabsb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pabsb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pabsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pabsb (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: pabsb %mm0, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load x86_mmx, x86_mmx *%a0, align 8 - %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) - %3 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone - -define i64 @test_pabsd(x86_mmx *%a0) optsize { -; GENERIC-LABEL: test_pabsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pabsd: -; ATOM: # %bb.0: -; ATOM-NEXT: pabsd (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pabsd: -; SLM: # %bb.0: -; SLM-NEXT: pabsd (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pabsd: -; SANDY: # %bb.0: -; SANDY-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pabsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pabsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pabsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pabsd: -; SKX: # %bb.0: -; SKX-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pabsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pabsd (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: pabsd %mm0, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pabsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pabsd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pabsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pabsd (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: pabsd %mm0, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load x86_mmx, x86_mmx *%a0, align 8 - %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) - %3 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone - -define i64 @test_pabsw(x86_mmx *%a0) optsize { -; GENERIC-LABEL: test_pabsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pabsw: -; ATOM: # %bb.0: -; ATOM-NEXT: pabsw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pabsw: -; SLM: # %bb.0: -; SLM-NEXT: pabsw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pabsw: -; SANDY: # %bb.0: -; SANDY-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pabsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pabsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pabsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pabsw: -; SKX: # %bb.0: -; SKX-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pabsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pabsw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: pabsw %mm0, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pabsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pabsw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pabsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pabsw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: pabsw %mm0, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load x86_mmx, x86_mmx *%a0, align 8 - %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) - %3 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone - -define i64 @test_packssdw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_packssdw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_packssdw: -; ATOM: # %bb.0: -; ATOM-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: packssdw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_packssdw: -; SLM: # %bb.0: -; SLM-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: packssdw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_packssdw: -; SANDY: # %bb.0: -; SANDY-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_packssdw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] -; HASWELL-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packssdw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packssdw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packssdw: -; SKX: # %bb.0: -; SKX-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] -; SKX-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_packssdw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: packssdw %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: packssdw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_packssdw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_packssdw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50] -; ZNVER1-NEXT: packssdw (%rdi), %mm0 # sched: [1:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_packsswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_packsswb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_packsswb: -; ATOM: # %bb.0: -; ATOM-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: packsswb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_packsswb: -; SLM: # %bb.0: -; SLM-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: packsswb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_packsswb: -; SANDY: # %bb.0: -; SANDY-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_packsswb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] -; HASWELL-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packsswb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packsswb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packsswb: -; SKX: # %bb.0: -; SKX-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] -; SKX-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_packsswb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: packsswb %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: packsswb (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_packsswb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_packsswb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50] -; ZNVER1-NEXT: packsswb (%rdi), %mm0 # sched: [1:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_packuswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_packuswb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_packuswb: -; ATOM: # %bb.0: -; ATOM-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: packuswb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_packuswb: -; SLM: # %bb.0: -; SLM-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: packuswb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_packuswb: -; SANDY: # %bb.0: -; SANDY-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_packuswb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] -; HASWELL-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packuswb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packuswb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packuswb: -; SKX: # %bb.0: -; SKX-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] -; SKX-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_packuswb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: packuswb %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: packuswb (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_packuswb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_packuswb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50] -; ZNVER1-NEXT: packuswb (%rdi), %mm0 # sched: [1:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_paddb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_paddb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddb: -; ATOM: # %bb.0: -; ATOM-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: paddb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddb: -; SLM: # %bb.0: -; SLM-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: paddb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_paddb: -; SANDY: # %bb.0: -; SANDY-NEXT: paddb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddb: -; SKX: # %bb.0: -; SKX-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_paddb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: paddb %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: paddb (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_paddb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: paddb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_paddb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: paddb %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: paddb (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_paddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_paddd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddd: -; ATOM: # %bb.0: -; ATOM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: paddd (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddd: -; SLM: # %bb.0: -; SLM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: paddd (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_paddd: -; SANDY: # %bb.0: -; SANDY-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddd: -; SKX: # %bb.0: -; SKX-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_paddd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: paddd %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: paddd (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_paddd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: paddd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_paddd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: paddd %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: paddd (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_paddq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_paddq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] -; GENERIC-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddq: -; ATOM: # %bb.0: -; ATOM-NEXT: paddq %mm1, %mm0 # sched: [2:1.00] -; ATOM-NEXT: paddq (%rdi), %mm0 # sched: [3:1.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddq: -; SLM: # %bb.0: -; SLM-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: paddq (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_paddq: -; SANDY: # %bb.0: -; SANDY-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] -; SANDY-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddq: -; SKX: # %bb.0: -; SKX-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_paddq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: paddq %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_paddq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: paddq (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_paddq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: paddq %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: paddq (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_paddsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_paddsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddsb: -; ATOM: # %bb.0: -; ATOM-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: paddsb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddsb: -; SLM: # %bb.0: -; SLM-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: paddsb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_paddsb: -; SANDY: # %bb.0: -; SANDY-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: paddsb (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddsb (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: paddsb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddsb: -; SKX: # %bb.0: -; SKX-NEXT: paddsb %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_paddsb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: paddsb %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: paddsb (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_paddsb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_paddsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: paddsb %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: paddsb (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_paddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_paddsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddsw: -; ATOM: # %bb.0: -; ATOM-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: paddsw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddsw: -; SLM: # %bb.0: -; SLM-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: paddsw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_paddsw: -; SANDY: # %bb.0: -; SANDY-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: paddsw (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddsw (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: paddsw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddsw: -; SKX: # %bb.0: -; SKX-NEXT: paddsw %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_paddsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: paddsw %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: paddsw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_paddsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_paddsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: paddsw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: paddsw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_paddusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_paddusb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddusb: -; ATOM: # %bb.0: -; ATOM-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: paddusb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddusb: -; SLM: # %bb.0: -; SLM-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: paddusb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_paddusb: -; SANDY: # %bb.0: -; SANDY-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddusb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: paddusb (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddusb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddusb (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddusb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: paddusb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddusb: -; SKX: # %bb.0: -; SKX-NEXT: paddusb %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_paddusb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: paddusb %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: paddusb (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_paddusb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_paddusb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: paddusb %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: paddusb (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_paddusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_paddusw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddusw: -; ATOM: # %bb.0: -; ATOM-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: paddusw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddusw: -; SLM: # %bb.0: -; SLM-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: paddusw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_paddusw: -; SANDY: # %bb.0: -; SANDY-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddusw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: paddusw (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddusw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddusw (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddusw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: paddusw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddusw: -; SKX: # %bb.0: -; SKX-NEXT: paddusw %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_paddusw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: paddusw %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: paddusw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_paddusw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_paddusw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: paddusw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: paddusw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_paddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_paddw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddw: -; ATOM: # %bb.0: -; ATOM-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: paddw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddw: -; SLM: # %bb.0: -; SLM-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: paddw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_paddw: -; SANDY: # %bb.0: -; SANDY-NEXT: paddw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_paddw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddw: -; SKX: # %bb.0: -; SKX-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_paddw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: paddw %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: paddw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_paddw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: paddw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_paddw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: paddw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: paddw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_palignr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_palignr: -; GENERIC: # %bb.0: -; GENERIC-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] -; GENERIC-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_palignr: -; ATOM: # %bb.0: -; ATOM-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] -; ATOM-NEXT: palignr $1, (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_palignr: -; SLM: # %bb.0: -; SLM-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: palignr $1, (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_palignr: -; SANDY: # %bb.0: -; SANDY-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] -; SANDY-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_palignr: -; HASWELL: # %bb.0: -; HASWELL-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_palignr: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_palignr: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_palignr: -; SKX: # %bb.0: -; SKX-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_palignr: -; BDVER2: # %bb.0: -; BDVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: palignr $1, (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_palignr: -; BTVER2: # %bb.0: -; BTVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_palignr: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: palignr $1, (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a0, x86_mmx %a1, i8 1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %1, x86_mmx %2, i8 1) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone - -define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pand: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pand %mm1, %mm0 # sched: [1:0.33] -; GENERIC-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pand: -; ATOM: # %bb.0: -; ATOM-NEXT: pand %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pand (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pand: -; SLM: # %bb.0: -; SLM-NEXT: pand %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pand (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pand: -; SANDY: # %bb.0: -; SANDY-NEXT: pand %mm1, %mm0 # sched: [1:0.33] -; SANDY-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pand: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pand %mm1, %mm0 # sched: [1:0.33] -; HASWELL-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pand: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pand %mm1, %mm0 # sched: [1:0.33] -; BROADWELL-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pand: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pand %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pand: -; SKX: # %bb.0: -; SKX-NEXT: pand %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pand: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pand %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pand (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pand: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pand %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pand (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pand: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pand %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pand (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pandn: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] -; GENERIC-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pandn: -; ATOM: # %bb.0: -; ATOM-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pandn (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pandn: -; SLM: # %bb.0: -; SLM-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pandn (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pandn: -; SANDY: # %bb.0: -; SANDY-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] -; SANDY-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pandn: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] -; HASWELL-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pandn: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] -; BROADWELL-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pandn: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pandn: -; SKX: # %bb.0: -; SKX-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pandn: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pandn %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pandn (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pandn: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pandn (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pandn: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pandn %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pandn (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pavgb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pavgb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pavgb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pavgb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pavgb: -; ATOM: # %bb.0: -; ATOM-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pavgb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pavgb: -; SLM: # %bb.0: -; SLM-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pavgb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pavgb: -; SANDY: # %bb.0: -; SANDY-NEXT: pavgb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pavgb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pavgb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: pavgb (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pavgb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pavgb (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pavgb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pavgb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pavgb: -; SKX: # %bb.0: -; SKX-NEXT: pavgb %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pavgb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pavgb %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pavgb (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pavgb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pavgb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pavgb %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pavgb (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pavgw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pavgw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pavgw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pavgw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pavgw: -; ATOM: # %bb.0: -; ATOM-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pavgw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pavgw: -; SLM: # %bb.0: -; SLM-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pavgw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pavgw: -; SANDY: # %bb.0: -; SANDY-NEXT: pavgw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pavgw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pavgw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: pavgw (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pavgw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pavgw (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pavgw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pavgw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pavgw: -; SKX: # %bb.0: -; SKX-NEXT: pavgw %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pavgw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pavgw %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pavgw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pavgw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pavgw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pavgw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pavgw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pcmpeqb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pcmpeqb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpeqb: -; ATOM: # %bb.0: -; ATOM-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pcmpeqb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpeqb: -; SLM: # %bb.0: -; SLM-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pcmpeqb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pcmpeqb: -; SANDY: # %bb.0: -; SANDY-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpeqb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqb: -; SKX: # %bb.0: -; SKX-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pcmpeqb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pcmpeqb (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pcmpeqb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pcmpeqb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pcmpeqd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pcmpeqd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpeqd: -; ATOM: # %bb.0: -; ATOM-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pcmpeqd (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpeqd: -; SLM: # %bb.0: -; SLM-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pcmpeqd (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pcmpeqd: -; SANDY: # %bb.0: -; SANDY-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpeqd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqd: -; SKX: # %bb.0: -; SKX-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pcmpeqd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pcmpeqd (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pcmpeqd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pcmpeqd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pcmpeqw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pcmpeqw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpeqw: -; ATOM: # %bb.0: -; ATOM-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pcmpeqw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpeqw: -; SLM: # %bb.0: -; SLM-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pcmpeqw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pcmpeqw: -; SANDY: # %bb.0: -; SANDY-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpeqw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqw: -; SKX: # %bb.0: -; SKX-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pcmpeqw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pcmpeqw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pcmpeqw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pcmpeqw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pcmpgtb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pcmpgtb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpgtb: -; ATOM: # %bb.0: -; ATOM-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pcmpgtb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpgtb: -; SLM: # %bb.0: -; SLM-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pcmpgtb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pcmpgtb: -; SANDY: # %bb.0: -; SANDY-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpgtb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtb: -; SKX: # %bb.0: -; SKX-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pcmpgtb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pcmpgtb (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pcmpgtb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pcmpgtb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pcmpgtd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pcmpgtd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpgtd: -; ATOM: # %bb.0: -; ATOM-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pcmpgtd (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpgtd: -; SLM: # %bb.0: -; SLM-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pcmpgtd (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pcmpgtd: -; SANDY: # %bb.0: -; SANDY-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpgtd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtd: -; SKX: # %bb.0: -; SKX-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pcmpgtd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pcmpgtd (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pcmpgtd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pcmpgtd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pcmpgtw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pcmpgtw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpgtw: -; ATOM: # %bb.0: -; ATOM-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pcmpgtw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpgtw: -; SLM: # %bb.0: -; SLM-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pcmpgtw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pcmpgtw: -; SANDY: # %bb.0: -; SANDY-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pcmpgtw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtw: -; SKX: # %bb.0: -; SKX-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pcmpgtw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pcmpgtw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pcmpgtw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pcmpgtw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone - -define i32 @test_pextrw(x86_mmx %a0) optsize { -; GENERIC-LABEL: test_pextrw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pextrw: -; ATOM: # %bb.0: -; ATOM-NEXT: pextrw $0, %mm0, %eax # sched: [4:2.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pextrw: -; SLM: # %bb.0: -; SLM-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pextrw: -; SANDY: # %bb.0: -; SANDY-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pextrw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pextrw $0, %mm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pextrw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pextrw $0, %mm0, %eax # sched: [2:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pextrw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pextrw: -; SKX: # %bb.0: -; SKX-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pextrw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pextrw $0, %mm0, %eax # sched: [13:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pextrw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pextrw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pextrw $0, %mm0, %eax # sched: [2:2.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.mmx.pextr.w(x86_mmx %a0, i32 0) - ret i32 %1 -} -declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32) nounwind readnone - -define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_phaddd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] -; GENERIC-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_phaddd: -; ATOM: # %bb.0: -; ATOM-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] -; ATOM-NEXT: phaddd (%rdi), %mm0 # sched: [4:2.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_phaddd: -; SLM: # %bb.0: -; SLM-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: phaddd (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_phaddd: -; SANDY: # %bb.0: -; SANDY-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] -; SANDY-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_phaddd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] -; HASWELL-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phaddd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phaddd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phaddd: -; SKX: # %bb.0: -; SKX-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] -; SKX-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_phaddd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: phaddd %mm1, %mm0 # sched: [5:0.50] -; BDVER2-NEXT: phaddd (%rdi), %mm0 # sched: [10:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_phaddd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: phaddd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_phaddd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: phaddd %mm1, %mm0 # sched: [100:0.25] -; ZNVER1-NEXT: phaddd (%rdi), %mm0 # sched: [100:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_phaddsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50] -; GENERIC-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_phaddsw: -; ATOM: # %bb.0: -; ATOM-NEXT: phaddsw %mm1, %mm0 # sched: [5:2.50] -; ATOM-NEXT: phaddsw (%rdi), %mm0 # sched: [6:3.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_phaddsw: -; SLM: # %bb.0: -; SLM-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: phaddsw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_phaddsw: -; SANDY: # %bb.0: -; SANDY-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50] -; SANDY-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_phaddsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] -; HASWELL-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phaddsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phaddsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phaddsw: -; SKX: # %bb.0: -; SKX-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] -; SKX-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_phaddsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: phaddsw %mm1, %mm0 # sched: [5:0.50] -; BDVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [10:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_phaddsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_phaddsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: phaddsw %mm1, %mm0 # sched: [100:0.25] -; ZNVER1-NEXT: phaddsw (%rdi), %mm0 # sched: [100:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_phaddw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50] -; GENERIC-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_phaddw: -; ATOM: # %bb.0: -; ATOM-NEXT: phaddw %mm1, %mm0 # sched: [5:2.50] -; ATOM-NEXT: phaddw (%rdi), %mm0 # sched: [6:3.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_phaddw: -; SLM: # %bb.0: -; SLM-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: phaddw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_phaddw: -; SANDY: # %bb.0: -; SANDY-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50] -; SANDY-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_phaddw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] -; HASWELL-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phaddw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phaddw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phaddw: -; SKX: # %bb.0: -; SKX-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] -; SKX-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_phaddw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: phaddw %mm1, %mm0 # sched: [5:0.50] -; BDVER2-NEXT: phaddw (%rdi), %mm0 # sched: [10:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_phaddw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: phaddw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_phaddw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: phaddw %mm1, %mm0 # sched: [100:0.25] -; ZNVER1-NEXT: phaddw (%rdi), %mm0 # sched: [100:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_phsubd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] -; GENERIC-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_phsubd: -; ATOM: # %bb.0: -; ATOM-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] -; ATOM-NEXT: phsubd (%rdi), %mm0 # sched: [4:2.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_phsubd: -; SLM: # %bb.0: -; SLM-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: phsubd (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_phsubd: -; SANDY: # %bb.0: -; SANDY-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] -; SANDY-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_phsubd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] -; HASWELL-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phsubd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phsubd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phsubd: -; SKX: # %bb.0: -; SKX-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] -; SKX-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_phsubd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: phsubd %mm1, %mm0 # sched: [5:0.50] -; BDVER2-NEXT: phsubd (%rdi), %mm0 # sched: [10:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_phsubd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: phsubd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_phsubd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: phsubd %mm1, %mm0 # sched: [100:0.25] -; ZNVER1-NEXT: phsubd (%rdi), %mm0 # sched: [100:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_phsubsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50] -; GENERIC-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_phsubsw: -; ATOM: # %bb.0: -; ATOM-NEXT: phsubsw %mm1, %mm0 # sched: [5:2.50] -; ATOM-NEXT: phsubsw (%rdi), %mm0 # sched: [6:3.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_phsubsw: -; SLM: # %bb.0: -; SLM-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: phsubsw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_phsubsw: -; SANDY: # %bb.0: -; SANDY-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50] -; SANDY-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_phsubsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] -; HASWELL-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phsubsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phsubsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phsubsw: -; SKX: # %bb.0: -; SKX-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] -; SKX-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_phsubsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: phsubsw %mm1, %mm0 # sched: [5:0.50] -; BDVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [10:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_phsubsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_phsubsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: phsubsw %mm1, %mm0 # sched: [100:0.25] -; ZNVER1-NEXT: phsubsw (%rdi), %mm0 # sched: [100:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_phsubw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50] -; GENERIC-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_phsubw: -; ATOM: # %bb.0: -; ATOM-NEXT: phsubw %mm1, %mm0 # sched: [5:2.50] -; ATOM-NEXT: phsubw (%rdi), %mm0 # sched: [6:3.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_phsubw: -; SLM: # %bb.0: -; SLM-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: phsubw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_phsubw: -; SANDY: # %bb.0: -; SANDY-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50] -; SANDY-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_phsubw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] -; HASWELL-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phsubw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] -; BROADWELL-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phsubw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] -; SKYLAKE-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phsubw: -; SKX: # %bb.0: -; SKX-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] -; SKX-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_phsubw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: phsubw %mm1, %mm0 # sched: [5:0.50] -; BDVER2-NEXT: phsubw (%rdi), %mm0 # sched: [10:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_phsubw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: phsubw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_phsubw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: phsubw %mm1, %mm0 # sched: [100:0.25] -; ZNVER1-NEXT: phsubw (%rdi), %mm0 # sched: [100:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize { -; GENERIC-LABEL: test_pinsrw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00] -; GENERIC-NEXT: movswl (%rsi), %eax # sched: [5:0.50] -; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pinsrw: -; ATOM: # %bb.0: -; ATOM-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00] -; ATOM-NEXT: movswl (%rsi), %eax # sched: [1:1.00] -; ATOM-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pinsrw: -; SLM: # %bb.0: -; SLM-NEXT: movswl (%rsi), %eax # sched: [4:1.00] -; SLM-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00] -; SLM-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pinsrw: -; SANDY: # %bb.0: -; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00] -; SANDY-NEXT: movswl (%rsi), %eax # sched: [5:0.50] -; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pinsrw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] -; HASWELL-NEXT: movswl (%rsi), %eax # sched: [5:0.50] -; HASWELL-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pinsrw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] -; BROADWELL-NEXT: movswl (%rsi), %eax # sched: [5:0.50] -; BROADWELL-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pinsrw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] -; SKYLAKE-NEXT: movswl (%rsi), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pinsrw: -; SKX: # %bb.0: -; SKX-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] -; SKX-NEXT: movswl (%rsi), %eax # sched: [5:0.50] -; SKX-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pinsrw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movswl (%rsi), %eax # sched: [5:0.50] -; BDVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pinsrw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movswl (%rsi), %eax # sched: [4:1.00] -; BTVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [7:0.50] -; BTVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [7:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pinsrw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movswl (%rsi), %eax # sched: [8:0.50] -; ZNVER1-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %a0, i32 %a1, i32 0) - %2 = load i16, i16 *%a2, align 2 - %3 = sext i16 %2 to i32 - %4 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %1, i32 %3, i32 1) - %5 = bitcast x86_mmx %4 to i64 - ret i64 %5 -} -declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32) nounwind readnone - -define i64 @test_pmaddwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pmaddwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmaddwd: -; ATOM: # %bb.0: -; ATOM-NEXT: pmaddwd %mm1, %mm0 # sched: [4:4.00] -; ATOM-NEXT: pmaddwd (%rdi), %mm0 # sched: [4:4.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmaddwd: -; SLM: # %bb.0: -; SLM-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] -; SLM-NEXT: pmaddwd (%rdi), %mm0 # sched: [7:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pmaddwd: -; SANDY: # %bb.0: -; SANDY-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaddwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] -; HASWELL-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaddwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaddwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaddwd: -; SKX: # %bb.0: -; SKX-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] -; SKX-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pmaddwd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] -; BDVER2-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pmaddwd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [2:1.00] -; BTVER2-NEXT: pmaddwd (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pmaddwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] -; ZNVER1-NEXT: pmaddwd (%rdi), %mm0 # sched: [11:1.00] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pmaddubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pmaddubsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmaddubsw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:4.00] -; ATOM-NEXT: pmaddubsw (%rdi), %mm0 # sched: [4:4.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmaddubsw: -; SLM: # %bb.0: -; SLM-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] -; SLM-NEXT: pmaddubsw (%rdi), %mm0 # sched: [7:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pmaddubsw: -; SANDY: # %bb.0: -; SANDY-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaddubsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] -; HASWELL-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaddubsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaddubsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaddubsw: -; SKX: # %bb.0: -; SKX-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] -; SKX-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pmaddubsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] -; BDVER2-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pmaddubsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [2:1.00] -; BTVER2-NEXT: pmaddubsw (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pmaddubsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] -; ZNVER1-NEXT: pmaddubsw (%rdi), %mm0 # sched: [11:1.00] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pmaxsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pmaxsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaxsw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmaxsw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pmaxsw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmaxsw: -; SLM: # %bb.0: -; SLM-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pmaxsw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pmaxsw: -; SANDY: # %bb.0: -; SANDY-NEXT: pmaxsw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaxsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pmaxsw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxsw: -; SKX: # %bb.0: -; SKX-NEXT: pmaxsw %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pmaxsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pmaxsw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pmaxsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pmaxsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pmaxub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pmaxub: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaxub %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pmaxub (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmaxub: -; ATOM: # %bb.0: -; ATOM-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pmaxub (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmaxub: -; SLM: # %bb.0: -; SLM-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pmaxub (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pmaxub: -; SANDY: # %bb.0: -; SANDY-NEXT: pmaxub %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pmaxub (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmaxub: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: pmaxub (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxub: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pmaxub (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxub: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pmaxub %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxub: -; SKX: # %bb.0: -; SKX-NEXT: pmaxub %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pmaxub: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pmaxub %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pmaxub (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pmaxub: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pmaxub: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pmaxub (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pminsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pminsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pminsw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pminsw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pminsw: -; ATOM: # %bb.0: -; ATOM-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pminsw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pminsw: -; SLM: # %bb.0: -; SLM-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pminsw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pminsw: -; SANDY: # %bb.0: -; SANDY-NEXT: pminsw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pminsw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pminsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: pminsw (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pminsw (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pminsw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminsw: -; SKX: # %bb.0: -; SKX-NEXT: pminsw %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pminsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pminsw %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pminsw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pminsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pminsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pminsw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pminsw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pminub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pminub: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pminub %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: pminub (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pminub: -; ATOM: # %bb.0: -; ATOM-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pminub (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pminub: -; SLM: # %bb.0: -; SLM-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pminub (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pminub: -; SANDY: # %bb.0: -; SANDY-NEXT: pminub %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: pminub (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pminub: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: pminub (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminub: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: pminub (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminub: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pminub %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminub: -; SKX: # %bb.0: -; SKX-NEXT: pminub %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pminub: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pminub %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pminub (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pminub: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pminub: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pminub %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pminub (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone - -define i32 @test_pmovmskb(x86_mmx %a0) optsize { -; GENERIC-LABEL: test_pmovmskb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmovmskb: -; ATOM: # %bb.0: -; ATOM-NEXT: pmovmskb %mm0, %eax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmovmskb: -; SLM: # %bb.0: -; SLM-NEXT: pmovmskb %mm0, %eax # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pmovmskb: -; SANDY: # %bb.0: -; SANDY-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmovmskb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovmskb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovmskb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pmovmskb %mm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovmskb: -; SKX: # %bb.0: -; SKX-NEXT: pmovmskb %mm0, %eax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pmovmskb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pmovmskb %mm0, %eax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pmovmskb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pmovmskb %mm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pmovmskb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) - ret i32 %1 -} -declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone - -define i64 @test_pmulhrsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pmulhrsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmulhrsw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:4.00] -; ATOM-NEXT: pmulhrsw (%rdi), %mm0 # sched: [4:4.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmulhrsw: -; SLM: # %bb.0: -; SLM-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] -; SLM-NEXT: pmulhrsw (%rdi), %mm0 # sched: [7:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pmulhrsw: -; SANDY: # %bb.0: -; SANDY-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmulhrsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] -; HASWELL-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmulhrsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmulhrsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmulhrsw: -; SKX: # %bb.0: -; SKX-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] -; SKX-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pmulhrsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] -; BDVER2-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pmulhrsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [2:1.00] -; BTVER2-NEXT: pmulhrsw (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pmulhrsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] -; ZNVER1-NEXT: pmulhrsw (%rdi), %mm0 # sched: [11:1.00] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pmulhw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pmulhw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmulhw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmulhw %mm1, %mm0 # sched: [4:4.00] -; ATOM-NEXT: pmulhw (%rdi), %mm0 # sched: [4:4.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmulhw: -; SLM: # %bb.0: -; SLM-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] -; SLM-NEXT: pmulhw (%rdi), %mm0 # sched: [7:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pmulhw: -; SANDY: # %bb.0: -; SANDY-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmulhw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] -; HASWELL-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmulhw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmulhw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmulhw: -; SKX: # %bb.0: -; SKX-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] -; SKX-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pmulhw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] -; BDVER2-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pmulhw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pmulhw %mm1, %mm0 # sched: [2:1.00] -; BTVER2-NEXT: pmulhw (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pmulhw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] -; ZNVER1-NEXT: pmulhw (%rdi), %mm0 # sched: [11:1.00] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pmulhuw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pmulhuw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmulhuw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmulhuw %mm1, %mm0 # sched: [4:4.00] -; ATOM-NEXT: pmulhuw (%rdi), %mm0 # sched: [4:4.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmulhuw: -; SLM: # %bb.0: -; SLM-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] -; SLM-NEXT: pmulhuw (%rdi), %mm0 # sched: [7:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pmulhuw: -; SANDY: # %bb.0: -; SANDY-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmulhuw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] -; HASWELL-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmulhuw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmulhuw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmulhuw: -; SKX: # %bb.0: -; SKX-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] -; SKX-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pmulhuw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] -; BDVER2-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pmulhuw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [2:1.00] -; BTVER2-NEXT: pmulhuw (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pmulhuw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] -; ZNVER1-NEXT: pmulhuw (%rdi), %mm0 # sched: [11:1.00] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pmullw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pmullw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmullw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmullw %mm1, %mm0 # sched: [4:4.00] -; ATOM-NEXT: pmullw (%rdi), %mm0 # sched: [4:4.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmullw: -; SLM: # %bb.0: -; SLM-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] -; SLM-NEXT: pmullw (%rdi), %mm0 # sched: [7:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pmullw: -; SANDY: # %bb.0: -; SANDY-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmullw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] -; HASWELL-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmullw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmullw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmullw: -; SKX: # %bb.0: -; SKX-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] -; SKX-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pmullw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] -; BDVER2-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pmullw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pmullw %mm1, %mm0 # sched: [2:1.00] -; BTVER2-NEXT: pmullw (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pmullw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] -; ZNVER1-NEXT: pmullw (%rdi), %mm0 # sched: [11:1.00] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pmuludq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pmuludq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmuludq: -; ATOM: # %bb.0: -; ATOM-NEXT: pmuludq %mm1, %mm0 # sched: [4:4.00] -; ATOM-NEXT: pmuludq (%rdi), %mm0 # sched: [4:4.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmuludq: -; SLM: # %bb.0: -; SLM-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] -; SLM-NEXT: pmuludq (%rdi), %mm0 # sched: [7:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pmuludq: -; SANDY: # %bb.0: -; SANDY-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pmuludq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] -; HASWELL-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmuludq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmuludq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] -; SKYLAKE-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmuludq: -; SKX: # %bb.0: -; SKX-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] -; SKX-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pmuludq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] -; BDVER2-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pmuludq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pmuludq %mm1, %mm0 # sched: [2:1.00] -; BTVER2-NEXT: pmuludq (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pmuludq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] -; ZNVER1-NEXT: pmuludq (%rdi), %mm0 # sched: [11:1.00] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_por: -; GENERIC: # %bb.0: -; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; GENERIC-NEXT: por (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_por: -; ATOM: # %bb.0: -; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: por (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_por: -; SLM: # %bb.0: -; SLM-NEXT: por %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: por (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_por: -; SANDY: # %bb.0: -; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; SANDY-NEXT: por (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_por: -; HASWELL: # %bb.0: -; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; HASWELL-NEXT: por (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_por: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; BROADWELL-NEXT: por (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_por: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: por %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: por (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_por: -; SKX: # %bb.0: -; SKX-NEXT: por %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: por (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_por: -; BDVER2: # %bb.0: -; BDVER2-NEXT: por %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: por (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_por: -; BTVER2: # %bb.0: -; BTVER2-NEXT: por %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: por (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_por: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: por %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: por (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_psadbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psadbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] -; GENERIC-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psadbw: -; ATOM: # %bb.0: -; ATOM-NEXT: psadbw %mm1, %mm0 # sched: [4:2.00] -; ATOM-NEXT: psadbw (%rdi), %mm0 # sched: [4:2.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psadbw: -; SLM: # %bb.0: -; SLM-NEXT: psadbw %mm1, %mm0 # sched: [4:1.00] -; SLM-NEXT: psadbw (%rdi), %mm0 # sched: [7:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psadbw: -; SANDY: # %bb.0: -; SANDY-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] -; SANDY-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psadbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] -; HASWELL-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psadbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] -; BROADWELL-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psadbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00] -; SKYLAKE-NEXT: psadbw (%rdi), %mm0 # sched: [8:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psadbw: -; SKX: # %bb.0: -; SKX-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00] -; SKX-NEXT: psadbw (%rdi), %mm0 # sched: [8:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psadbw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psadbw %mm1, %mm0 # sched: [4:0.50] -; BDVER2-NEXT: psadbw (%rdi), %mm0 # sched: [9:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psadbw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psadbw %mm1, %mm0 # sched: [2:0.50] -; BTVER2-NEXT: psadbw (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psadbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00] -; ZNVER1-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pshufb(x86_mmx %a0, x86_mmx %a1, x86_mmx *%a2) optsize { -; GENERIC-LABEL: test_pshufb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50] -; GENERIC-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pshufb: -; ATOM: # %bb.0: -; ATOM-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] -; ATOM-NEXT: pshufb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pshufb: -; SLM: # %bb.0: -; SLM-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: pshufb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pshufb: -; SANDY: # %bb.0: -; SANDY-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50] -; SANDY-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pshufb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pshufb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pshufb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pshufb: -; SKX: # %bb.0: -; SKX-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pshufb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pshufb %mm1, %mm0 # sched: [3:2.00] -; BDVER2-NEXT: pshufb (%rdi), %mm0 # sched: [8:2.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pshufb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pshufb %mm1, %mm0 # sched: [2:0.50] -; BTVER2-NEXT: pshufb (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pshufb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pshufb %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pshufb (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pshufw(x86_mmx *%a0) optsize { -; GENERIC-LABEL: test_pshufw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] -; GENERIC-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pshufw: -; ATOM: # %bb.0: -; ATOM-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [1:1.00] -; ATOM-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pshufw: -; SLM: # %bb.0: -; SLM-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [4:1.00] -; SLM-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pshufw: -; SANDY: # %bb.0: -; SANDY-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] -; SANDY-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pshufw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] -; HASWELL-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pshufw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] -; BROADWELL-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pshufw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] -; SKYLAKE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pshufw: -; SKX: # %bb.0: -; SKX-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] -; SKX-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pshufw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [7:0.50] -; BDVER2-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pshufw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] -; BTVER2-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pshufw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [8:0.50] -; ZNVER1-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load x86_mmx, x86_mmx *%a0, align 8 - %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 0) - %3 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %2, i8 0) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone - -define i64 @test_psignb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psignb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] -; GENERIC-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psignb: -; ATOM: # %bb.0: -; ATOM-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: psignb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psignb: -; SLM: # %bb.0: -; SLM-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: psignb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psignb: -; SANDY: # %bb.0: -; SANDY-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] -; SANDY-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psignb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psignb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psignb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psignb: -; SKX: # %bb.0: -; SKX-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psignb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psignb %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: psignb (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psignb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psignb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psignb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psignb %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psignb (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_psignd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psignd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] -; GENERIC-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psignd: -; ATOM: # %bb.0: -; ATOM-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: psignd (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psignd: -; SLM: # %bb.0: -; SLM-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: psignd (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psignd: -; SANDY: # %bb.0: -; SANDY-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] -; SANDY-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psignd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psignd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psignd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psignd: -; SKX: # %bb.0: -; SKX-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psignd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psignd %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: psignd (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psignd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psignd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psignd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psignd %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psignd (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_psignw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psignw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] -; GENERIC-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psignw: -; ATOM: # %bb.0: -; ATOM-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: psignw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psignw: -; SLM: # %bb.0: -; SLM-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: psignw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psignw: -; SANDY: # %bb.0: -; SANDY-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] -; SANDY-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psignw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psignw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psignw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psignw: -; SKX: # %bb.0: -; SKX-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psignw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psignw %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: psignw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psignw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psignw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psignw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psignw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psignw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pslld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pslld: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: pslld $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pslld: -; ATOM: # %bb.0: -; ATOM-NEXT: pslld %mm1, %mm0 # sched: [2:1.00] -; ATOM-NEXT: pslld (%rdi), %mm0 # sched: [3:1.50] -; ATOM-NEXT: pslld $7, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pslld: -; SLM: # %bb.0: -; SLM-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: pslld (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: pslld $7, %mm0 # sched: [1:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pslld: -; SANDY: # %bb.0: -; SANDY-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: pslld $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pslld: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] -; HASWELL-NEXT: pslld $7, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pslld: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] -; BROADWELL-NEXT: pslld $7, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pslld: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: pslld $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pslld: -; SKX: # %bb.0: -; SKX-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: pslld $7, %mm0 # sched: [1:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pslld: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pslld %mm1, %mm0 # sched: [3:0.50] -; BDVER2-NEXT: pslld (%rdi), %mm0 # sched: [8:0.50] -; BDVER2-NEXT: pslld $7, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pslld: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pslld %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: pslld $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pslld: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pslld %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pslld (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: pslld $7, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %1, x86_mmx %2) - %4 = call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %3, i32 7) - %5 = bitcast x86_mmx %4 to i64 - ret i64 %5 -} -declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone -declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone - -define i64 @test_psllq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psllq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: psllq $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psllq: -; ATOM: # %bb.0: -; ATOM-NEXT: psllq %mm1, %mm0 # sched: [2:1.00] -; ATOM-NEXT: psllq (%rdi), %mm0 # sched: [3:1.50] -; ATOM-NEXT: psllq $7, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psllq: -; SLM: # %bb.0: -; SLM-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: psllq (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: psllq $7, %mm0 # sched: [1:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psllq: -; SANDY: # %bb.0: -; SANDY-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: psllq $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psllq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] -; HASWELL-NEXT: psllq $7, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psllq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] -; BROADWELL-NEXT: psllq $7, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psllq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: psllq $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psllq: -; SKX: # %bb.0: -; SKX-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: psllq $7, %mm0 # sched: [1:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psllq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psllq %mm1, %mm0 # sched: [3:0.50] -; BDVER2-NEXT: psllq (%rdi), %mm0 # sched: [8:0.50] -; BDVER2-NEXT: psllq $7, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psllq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psllq %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: psllq $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psllq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psllq %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psllq (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: psllq $7, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %1, x86_mmx %2) - %4 = call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %3, i32 7) - %5 = bitcast x86_mmx %4 to i64 - ret i64 %5 -} -declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone -declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone - -define i64 @test_psllw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psllw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: psllw $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psllw: -; ATOM: # %bb.0: -; ATOM-NEXT: psllw %mm1, %mm0 # sched: [2:1.00] -; ATOM-NEXT: psllw (%rdi), %mm0 # sched: [3:1.50] -; ATOM-NEXT: psllw $7, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psllw: -; SLM: # %bb.0: -; SLM-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: psllw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: psllw $7, %mm0 # sched: [1:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psllw: -; SANDY: # %bb.0: -; SANDY-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: psllw $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psllw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] -; HASWELL-NEXT: psllw $7, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psllw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] -; BROADWELL-NEXT: psllw $7, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psllw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: psllw $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psllw: -; SKX: # %bb.0: -; SKX-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: psllw $7, %mm0 # sched: [1:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psllw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psllw %mm1, %mm0 # sched: [3:0.50] -; BDVER2-NEXT: psllw (%rdi), %mm0 # sched: [8:0.50] -; BDVER2-NEXT: psllw $7, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psllw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psllw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: psllw $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psllw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psllw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psllw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: psllw $7, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %1, x86_mmx %2) - %4 = call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %3, i32 7) - %5 = bitcast x86_mmx %4 to i64 - ret i64 %5 -} -declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone -declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone - -define i64 @test_psrad(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psrad: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: psrad $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrad: -; ATOM: # %bb.0: -; ATOM-NEXT: psrad %mm1, %mm0 # sched: [2:1.00] -; ATOM-NEXT: psrad (%rdi), %mm0 # sched: [3:1.50] -; ATOM-NEXT: psrad $7, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrad: -; SLM: # %bb.0: -; SLM-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: psrad (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: psrad $7, %mm0 # sched: [1:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psrad: -; SANDY: # %bb.0: -; SANDY-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: psrad $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psrad: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] -; HASWELL-NEXT: psrad $7, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrad: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] -; BROADWELL-NEXT: psrad $7, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrad: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: psrad $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrad: -; SKX: # %bb.0: -; SKX-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: psrad $7, %mm0 # sched: [1:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psrad: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psrad %mm1, %mm0 # sched: [3:0.50] -; BDVER2-NEXT: psrad (%rdi), %mm0 # sched: [8:0.50] -; BDVER2-NEXT: psrad $7, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psrad: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psrad %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: psrad $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psrad: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psrad %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psrad (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: psrad $7, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %1, x86_mmx %2) - %4 = call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %3, i32 7) - %5 = bitcast x86_mmx %4 to i64 - ret i64 %5 -} -declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone - -define i64 @test_psraw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psraw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: psraw $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psraw: -; ATOM: # %bb.0: -; ATOM-NEXT: psraw %mm1, %mm0 # sched: [2:1.00] -; ATOM-NEXT: psraw (%rdi), %mm0 # sched: [3:1.50] -; ATOM-NEXT: psraw $7, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psraw: -; SLM: # %bb.0: -; SLM-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: psraw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: psraw $7, %mm0 # sched: [1:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psraw: -; SANDY: # %bb.0: -; SANDY-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: psraw $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psraw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] -; HASWELL-NEXT: psraw $7, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psraw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] -; BROADWELL-NEXT: psraw $7, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psraw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: psraw $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psraw: -; SKX: # %bb.0: -; SKX-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: psraw $7, %mm0 # sched: [1:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psraw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psraw %mm1, %mm0 # sched: [3:0.50] -; BDVER2-NEXT: psraw (%rdi), %mm0 # sched: [8:0.50] -; BDVER2-NEXT: psraw $7, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psraw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psraw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: psraw $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psraw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psraw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psraw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: psraw $7, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %1, x86_mmx %2) - %4 = call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %3, i32 7) - %5 = bitcast x86_mmx %4 to i64 - ret i64 %5 -} -declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone - -define i64 @test_psrld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psrld: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: psrld $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrld: -; ATOM: # %bb.0: -; ATOM-NEXT: psrld %mm1, %mm0 # sched: [2:1.00] -; ATOM-NEXT: psrld (%rdi), %mm0 # sched: [3:1.50] -; ATOM-NEXT: psrld $7, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrld: -; SLM: # %bb.0: -; SLM-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: psrld (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: psrld $7, %mm0 # sched: [1:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psrld: -; SANDY: # %bb.0: -; SANDY-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: psrld $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psrld: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] -; HASWELL-NEXT: psrld $7, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrld: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] -; BROADWELL-NEXT: psrld $7, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrld: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: psrld $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrld: -; SKX: # %bb.0: -; SKX-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: psrld $7, %mm0 # sched: [1:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psrld: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psrld %mm1, %mm0 # sched: [3:0.50] -; BDVER2-NEXT: psrld (%rdi), %mm0 # sched: [8:0.50] -; BDVER2-NEXT: psrld $7, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psrld: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psrld %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: psrld $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psrld: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psrld %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psrld (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: psrld $7, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %1, x86_mmx %2) - %4 = call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %3, i32 7) - %5 = bitcast x86_mmx %4 to i64 - ret i64 %5 -} -declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone - -define i64 @test_psrlq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psrlq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: psrlq $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrlq: -; ATOM: # %bb.0: -; ATOM-NEXT: psrlq %mm1, %mm0 # sched: [2:1.00] -; ATOM-NEXT: psrlq (%rdi), %mm0 # sched: [3:1.50] -; ATOM-NEXT: psrlq $7, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrlq: -; SLM: # %bb.0: -; SLM-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: psrlq (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: psrlq $7, %mm0 # sched: [1:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psrlq: -; SANDY: # %bb.0: -; SANDY-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: psrlq $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psrlq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] -; HASWELL-NEXT: psrlq $7, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrlq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] -; BROADWELL-NEXT: psrlq $7, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrlq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: psrlq $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrlq: -; SKX: # %bb.0: -; SKX-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: psrlq $7, %mm0 # sched: [1:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psrlq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psrlq %mm1, %mm0 # sched: [3:0.50] -; BDVER2-NEXT: psrlq (%rdi), %mm0 # sched: [8:0.50] -; BDVER2-NEXT: psrlq $7, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psrlq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psrlq %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: psrlq $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psrlq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psrlq %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psrlq (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: psrlq $7, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %1, x86_mmx %2) - %4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %3, i32 7) - %5 = bitcast x86_mmx %4 to i64 - ret i64 %5 -} -declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone - -define i64 @test_psrlw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psrlw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: psrlw $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrlw: -; ATOM: # %bb.0: -; ATOM-NEXT: psrlw %mm1, %mm0 # sched: [2:1.00] -; ATOM-NEXT: psrlw (%rdi), %mm0 # sched: [3:1.50] -; ATOM-NEXT: psrlw $7, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrlw: -; SLM: # %bb.0: -; SLM-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] -; SLM-NEXT: psrlw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: psrlw $7, %mm0 # sched: [1:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psrlw: -; SANDY: # %bb.0: -; SANDY-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] -; SANDY-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: psrlw $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psrlw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] -; HASWELL-NEXT: psrlw $7, %mm0 # sched: [1:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrlw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] -; BROADWELL-NEXT: psrlw $7, %mm0 # sched: [1:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrlw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: psrlw $7, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrlw: -; SKX: # %bb.0: -; SKX-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: psrlw $7, %mm0 # sched: [1:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psrlw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psrlw %mm1, %mm0 # sched: [3:0.50] -; BDVER2-NEXT: psrlw (%rdi), %mm0 # sched: [8:0.50] -; BDVER2-NEXT: psrlw $7, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psrlw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psrlw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: psrlw $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psrlw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psrlw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psrlw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: psrlw $7, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %1, x86_mmx %2) - %4 = call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %3, i32 7) - %5 = bitcast x86_mmx %4 to i64 - ret i64 %5 -} -declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone -declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone - -define i64 @test_psubb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psubb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubb: -; ATOM: # %bb.0: -; ATOM-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: psubb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubb: -; SLM: # %bb.0: -; SLM-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: psubb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psubb: -; SANDY: # %bb.0: -; SANDY-NEXT: psubb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubb: -; SKX: # %bb.0: -; SKX-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psubb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psubb %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: psubb (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psubb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psubb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psubb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psubb %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psubb (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_psubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psubd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubd %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubd: -; ATOM: # %bb.0: -; ATOM-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: psubd (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubd: -; SLM: # %bb.0: -; SLM-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: psubd (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psubd: -; SANDY: # %bb.0: -; SANDY-NEXT: psubd %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubd: -; SKX: # %bb.0: -; SKX-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psubd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psubd %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: psubd (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psubd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psubd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psubd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psubd %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psubd (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_psubq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psubq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubq %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubq: -; ATOM: # %bb.0: -; ATOM-NEXT: psubq %mm1, %mm0 # sched: [2:1.00] -; ATOM-NEXT: psubq (%rdi), %mm0 # sched: [3:1.50] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubq: -; SLM: # %bb.0: -; SLM-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: psubq (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psubq: -; SANDY: # %bb.0: -; SANDY-NEXT: psubq %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubq: -; SKX: # %bb.0: -; SKX-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psubq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psubq %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: psubq (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psubq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psubq (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psubq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psubq %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psubq (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_psubsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psubsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubsb: -; ATOM: # %bb.0: -; ATOM-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: psubsb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubsb: -; SLM: # %bb.0: -; SLM-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: psubsb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psubsb: -; SANDY: # %bb.0: -; SANDY-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: psubsb (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubsb (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psubsb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubsb: -; SKX: # %bb.0: -; SKX-NEXT: psubsb %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psubsb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psubsb %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: psubsb (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psubsb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psubsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psubsb %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psubsb (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_psubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psubsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubsw: -; ATOM: # %bb.0: -; ATOM-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: psubsw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubsw: -; SLM: # %bb.0: -; SLM-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: psubsw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psubsw: -; SANDY: # %bb.0: -; SANDY-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: psubsw (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubsw (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psubsw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubsw: -; SKX: # %bb.0: -; SKX-NEXT: psubsw %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psubsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psubsw %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: psubsw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psubsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psubsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psubsw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psubsw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_psubusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psubusb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubusb: -; ATOM: # %bb.0: -; ATOM-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: psubusb (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubusb: -; SLM: # %bb.0: -; SLM-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: psubusb (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psubusb: -; SANDY: # %bb.0: -; SANDY-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubusb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: psubusb (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubusb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubusb (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubusb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psubusb %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubusb: -; SKX: # %bb.0: -; SKX-NEXT: psubusb %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psubusb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psubusb %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: psubusb (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psubusb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psubusb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psubusb %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psubusb (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_psubusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psubusw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubusw: -; ATOM: # %bb.0: -; ATOM-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: psubusw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubusw: -; SLM: # %bb.0: -; SLM-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: psubusw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psubusw: -; SANDY: # %bb.0: -; SANDY-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubusw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: psubusw (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubusw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubusw (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubusw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psubusw %mm1, %mm0 # sched: [1:1.00] -; SKYLAKE-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubusw: -; SKX: # %bb.0: -; SKX-NEXT: psubusw %mm1, %mm0 # sched: [1:1.00] -; SKX-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psubusw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psubusw %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: psubusw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psubusw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psubusw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psubusw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psubusw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_psubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_psubw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubw %mm1, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubw: -; ATOM: # %bb.0: -; ATOM-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: psubw (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubw: -; SLM: # %bb.0: -; SLM-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: psubw (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_psubw: -; SANDY: # %bb.0: -; SANDY-NEXT: psubw %mm1, %mm0 # sched: [3:1.00] -; SANDY-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_psubw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] -; HASWELL-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] -; BROADWELL-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubw: -; SKX: # %bb.0: -; SKX-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_psubw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: psubw %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: psubw (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_psubw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: psubw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_psubw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: psubw %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: psubw (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_punpckhbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_punpckhbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] -; GENERIC-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckhbw: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50] -; ATOM-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckhbw: -; SLM: # %bb.0: -; SLM-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] -; SLM-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_punpckhbw: -; SANDY: # %bb.0: -; SANDY-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] -; SANDY-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpckhbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] -; HASWELL-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] -; BROADWELL-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhbw: -; SKX: # %bb.0: -; SKX-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] -; SKX-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_punpckhbw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [2:0.50] -; BDVER2-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_punpckhbw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50] -; BTVER2-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_punpckhbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.25] -; ZNVER1-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_punpckhdq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_punpckhdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] -; GENERIC-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckhdq: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50] -; ATOM-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckhdq: -; SLM: # %bb.0: -; SLM-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] -; SLM-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_punpckhdq: -; SANDY: # %bb.0: -; SANDY-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] -; SANDY-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpckhdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] -; HASWELL-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] -; BROADWELL-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhdq: -; SKX: # %bb.0: -; SKX-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] -; SKX-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_punpckhdq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [2:0.50] -; BDVER2-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_punpckhdq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50] -; BTVER2-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_punpckhdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.25] -; ZNVER1-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_punpckhwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_punpckhwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; GENERIC-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckhwd: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] -; ATOM-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckhwd: -; SLM: # %bb.0: -; SLM-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; SLM-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_punpckhwd: -; SANDY: # %bb.0: -; SANDY-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; SANDY-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpckhwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; HASWELL-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; BROADWELL-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhwd: -; SKX: # %bb.0: -; SKX-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; SKX-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_punpckhwd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [2:0.50] -; BDVER2-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_punpckhwd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] -; BTVER2-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_punpckhwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25] -; ZNVER1-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_punpcklbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_punpcklbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; GENERIC-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpcklbw: -; ATOM: # %bb.0: -; ATOM-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; ATOM-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpcklbw: -; SLM: # %bb.0: -; SLM-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; SLM-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_punpcklbw: -; SANDY: # %bb.0: -; SANDY-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; SANDY-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpcklbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; HASWELL-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpcklbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; BROADWELL-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpcklbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpcklbw: -; SKX: # %bb.0: -; SKX-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; SKX-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_punpcklbw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [2:0.50] -; BDVER2-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_punpcklbw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] -; BTVER2-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_punpcklbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25] -; ZNVER1-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_punpckldq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_punpckldq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] -; GENERIC-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckldq: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] -; ATOM-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckldq: -; SLM: # %bb.0: -; SLM-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] -; SLM-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_punpckldq: -; SANDY: # %bb.0: -; SANDY-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] -; SANDY-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpckldq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] -; HASWELL-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckldq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] -; BROADWELL-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckldq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] -; SKYLAKE-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckldq: -; SKX: # %bb.0: -; SKX-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] -; SKX-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_punpckldq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [2:0.50] -; BDVER2-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_punpckldq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.50] -; BTVER2-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_punpckldq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.25] -; ZNVER1-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_punpcklwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_punpcklwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] -; GENERIC-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpcklwd: -; ATOM: # %bb.0: -; ATOM-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] -; ATOM-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpcklwd: -; SLM: # %bb.0: -; SLM-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] -; SLM-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_punpcklwd: -; SANDY: # %bb.0: -; SANDY-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] -; SANDY-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_punpcklwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] -; HASWELL-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpcklwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] -; BROADWELL-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpcklwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpcklwd: -; SKX: # %bb.0: -; SKX-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] -; SKX-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_punpcklwd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [2:0.50] -; BDVER2-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_punpcklwd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.50] -; BTVER2-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_punpcklwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.25] -; ZNVER1-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone - -define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { -; GENERIC-LABEL: test_pxor: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] -; GENERIC-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pxor: -; ATOM: # %bb.0: -; ATOM-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: pxor (%rdi), %mm0 # sched: [1:1.00] -; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pxor: -; SLM: # %bb.0: -; SLM-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] -; SLM-NEXT: pxor (%rdi), %mm0 # sched: [4:1.00] -; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pxor: -; SANDY: # %bb.0: -; SANDY-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] -; SANDY-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pxor: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] -; HASWELL-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] -; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pxor: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] -; BROADWELL-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] -; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pxor: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] -; SKYLAKE-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] -; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pxor: -; SKX: # %bb.0: -; SKX-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] -; SKX-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] -; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pxor: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pxor %mm1, %mm0 # sched: [2:0.50] -; BDVER2-NEXT: pxor (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pxor: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: pxor (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pxor: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pxor %mm1, %mm0 # sched: [1:0.25] -; ZNVER1-NEXT: pxor (%rdi), %mm0 # sched: [8:0.50] -; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a0, x86_mmx %a1) - %2 = load x86_mmx, x86_mmx *%a2, align 8 - %3 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %1, x86_mmx %2) - %4 = bitcast x86_mmx %3 to i64 - ret i64 %4 -} -declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone Index: test/CodeGen/X86/movbe-schedule.ll =================================================================== --- test/CodeGen/X86/movbe-schedule.ll +++ test/CodeGen/X86/movbe-schedule.ll @@ -1,190 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+movbe | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define i16 @test_movbe_i16(i16 *%a0, i16 %a1, i16 *%a2) { -; GENERIC-LABEL: test_movbe_i16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movbew (%rdi), %ax # sched: [6:0.50] -; GENERIC-NEXT: movbew %si, (%rdx) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movbe_i16: -; ATOM: # %bb.0: -; ATOM-NEXT: movbew (%rdi), %ax # sched: [1:1.00] -; ATOM-NEXT: movbew %si, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movbe_i16: -; SLM: # %bb.0: -; SLM-NEXT: movbew (%rdi), %ax # sched: [4:1.00] -; SLM-NEXT: movbew %si, (%rdx) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; HASWELL-LABEL: test_movbe_i16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movbew (%rdi), %ax # sched: [6:0.50] -; HASWELL-NEXT: movbew %si, (%rdx) # sched: [2:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movbe_i16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movbew (%rdi), %ax # sched: [6:0.50] -; BROADWELL-NEXT: movbew %si, (%rdx) # sched: [2:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movbe_i16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movbew (%rdi), %ax # sched: [6:0.50] -; SKYLAKE-NEXT: movbew %si, (%rdx) # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BTVER2-LABEL: test_movbe_i16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movbew (%rdi), %ax # sched: [4:1.00] -; BTVER2-NEXT: movbew %si, (%rdx) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movbe_i16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movbew (%rdi), %ax # sched: [5:0.50] -; ZNVER1-NEXT: movbew %si, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i16, i16 *%a0 - %2 = tail call i16 @llvm.bswap.i16( i16 %1 ) - %3 = tail call i16 @llvm.bswap.i16( i16 %a1 ) - store i16 %3, i16* %a2, align 2 - ret i16 %2 -} -declare i16 @llvm.bswap.i16(i16) - -define i32 @test_movbe_i32(i32 *%a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_movbe_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movbel (%rdi), %eax # sched: [6:0.50] -; GENERIC-NEXT: movbel %esi, (%rdx) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movbe_i32: -; ATOM: # %bb.0: -; ATOM-NEXT: movbel (%rdi), %eax # sched: [1:1.00] -; ATOM-NEXT: movbel %esi, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movbe_i32: -; SLM: # %bb.0: -; SLM-NEXT: movbel (%rdi), %eax # sched: [4:1.00] -; SLM-NEXT: movbel %esi, (%rdx) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; HASWELL-LABEL: test_movbe_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movbel (%rdi), %eax # sched: [6:0.50] -; HASWELL-NEXT: movbel %esi, (%rdx) # sched: [2:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movbe_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movbel (%rdi), %eax # sched: [6:0.50] -; BROADWELL-NEXT: movbel %esi, (%rdx) # sched: [2:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movbe_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movbel (%rdi), %eax # sched: [6:0.50] -; SKYLAKE-NEXT: movbel %esi, (%rdx) # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BTVER2-LABEL: test_movbe_i32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movbel (%rdi), %eax # sched: [4:1.00] -; BTVER2-NEXT: movbel %esi, (%rdx) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movbe_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movbel (%rdi), %eax # sched: [5:0.50] -; ZNVER1-NEXT: movbel %esi, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a0 - %2 = tail call i32 @llvm.bswap.i32( i32 %1 ) - %3 = tail call i32 @llvm.bswap.i32( i32 %a1 ) - store i32 %3, i32* %a2, align 2 - ret i32 %2 -} -declare i32 @llvm.bswap.i32(i32) - -define i64 @test_movbe_i64(i64 *%a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: test_movbe_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movbeq (%rdi), %rax # sched: [6:0.50] -; GENERIC-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movbe_i64: -; ATOM: # %bb.0: -; ATOM-NEXT: movbeq (%rdi), %rax # sched: [1:1.00] -; ATOM-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movbe_i64: -; SLM: # %bb.0: -; SLM-NEXT: movbeq (%rdi), %rax # sched: [4:1.00] -; SLM-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; HASWELL-LABEL: test_movbe_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movbeq (%rdi), %rax # sched: [6:0.50] -; HASWELL-NEXT: movbeq %rsi, (%rdx) # sched: [2:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movbe_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movbeq (%rdi), %rax # sched: [6:0.50] -; BROADWELL-NEXT: movbeq %rsi, (%rdx) # sched: [2:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movbe_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movbeq (%rdi), %rax # sched: [6:0.50] -; SKYLAKE-NEXT: movbeq %rsi, (%rdx) # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BTVER2-LABEL: test_movbe_i64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movbeq (%rdi), %rax # sched: [4:1.00] -; BTVER2-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movbe_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movbeq (%rdi), %rax # sched: [5:0.50] -; ZNVER1-NEXT: movbeq %rsi, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a0 - %2 = tail call i64 @llvm.bswap.i64( i64 %1 ) - %3 = tail call i64 @llvm.bswap.i64( i64 %a1 ) - store i64 %3, i64* %a2, align 2 - ret i64 %2 -} -declare i64 @llvm.bswap.i64(i64) Index: test/CodeGen/X86/mul-constant-i32.ll =================================================================== --- test/CodeGen/X86/mul-constant-i32.ll +++ test/CodeGen/X86/mul-constant-i32.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW -; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefix=X64-HSW +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=btver2 | FileCheck %s --check-prefix=X64-JAG ; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT -; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT -; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT -; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM -; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -mcpu=haswell | FileCheck %s --check-prefix=HSW-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -mcpu=btver2 | FileCheck %s --check-prefix=JAG-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=slm | FileCheck %s --check-prefix=X64-SLM +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -mcpu=slm | FileCheck %s --check-prefix=SLM-NOOPT define i32 @test_mul_by_1(i32 %x) { ; X86-LABEL: test_mul_by_1: @@ -16,13 +16,13 @@ ; ; X64-HSW-LABEL: test_mul_by_1: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movl %edi, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_1: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movl %edi, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_1: ; X86-NOOPT: # %bb.0: @@ -31,23 +31,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_1: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: movl %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_1: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: movl %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_1: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movl %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_1: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: movl %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 1 ret i32 %mul } @@ -62,14 +62,14 @@ ; X64-HSW-LABEL: test_mul_by_2: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_2: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_2: ; X86-NOOPT: # %bb.0: @@ -80,26 +80,26 @@ ; HSW-NOOPT-LABEL: test_mul_by_2: ; HSW-NOOPT: # %bb.0: ; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_2: ; JAG-NOOPT: # %bb.0: ; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; JAG-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leal (%rdi,%rdi), %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_2: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_2: ; SLM-NOOPT: # %bb.0: ; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leal (%rdi,%rdi), %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 2 ret i32 %mul } @@ -114,14 +114,14 @@ ; X64-HSW-LABEL: test_mul_by_3: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_3: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_3: ; X86-NOOPT: # %bb.0: @@ -131,26 +131,26 @@ ; HSW-NOOPT-LABEL: test_mul_by_3: ; HSW-NOOPT: # %bb.0: ; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_3: ; JAG-NOOPT: # %bb.0: ; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; JAG-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_3: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_3: ; SLM-NOOPT: # %bb.0: ; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 3 ret i32 %mul } @@ -165,14 +165,14 @@ ; X64-HSW-LABEL: test_mul_by_4: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (,%rdi,4), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_4: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (,%rdi,4), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_4: ; X86-NOOPT: # %bb.0: @@ -183,26 +183,26 @@ ; HSW-NOOPT-LABEL: test_mul_by_4: ; HSW-NOOPT: # %bb.0: ; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_4: ; JAG-NOOPT: # %bb.0: ; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; JAG-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [2:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leal (,%rdi,4), %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_4: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leal (,%rdi,4), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_4: ; SLM-NOOPT: # %bb.0: ; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leal (,%rdi,4), %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 4 ret i32 %mul } @@ -217,14 +217,14 @@ ; X64-HSW-LABEL: test_mul_by_5: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_5: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_5: ; X86-NOOPT: # %bb.0: @@ -234,26 +234,26 @@ ; HSW-NOOPT-LABEL: test_mul_by_5: ; HSW-NOOPT: # %bb.0: ; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_5: ; JAG-NOOPT: # %bb.0: ; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; JAG-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_5: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_5: ; SLM-NOOPT: # %bb.0: ; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 5 ret i32 %mul } @@ -269,16 +269,16 @@ ; X64-HSW-LABEL: test_mul_by_6: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] -; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: addl %edi, %edi +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_6: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: addl %edi, %edi +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_6: ; X86-NOOPT: # %bb.0: @@ -287,25 +287,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_6: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $6, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_6: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $6, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_6: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50] -; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: addl %edi, %edi +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_6: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $6, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 6 ret i32 %mul } @@ -321,16 +321,16 @@ ; X64-HSW-LABEL: test_mul_by_7: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (,%rdi,8), %eax +; X64-HSW-NEXT: subl %edi, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_7: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (,%rdi,8), %eax +; X64-JAG-NEXT: subl %edi, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_7: ; X86-NOOPT: # %bb.0: @@ -339,25 +339,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_7: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $7, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_7: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $7, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_7: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00] -; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leal (,%rdi,8), %eax +; X64-SLM-NEXT: subl %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_7: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $7, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 7 ret i32 %mul } @@ -372,14 +372,14 @@ ; X64-HSW-LABEL: test_mul_by_8: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (,%rdi,8), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_8: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (,%rdi,8), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_8: ; X86-NOOPT: # %bb.0: @@ -390,26 +390,26 @@ ; HSW-NOOPT-LABEL: test_mul_by_8: ; HSW-NOOPT: # %bb.0: ; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_8: ; JAG-NOOPT: # %bb.0: ; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; JAG-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [2:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leal (,%rdi,8), %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_8: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leal (,%rdi,8), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_8: ; SLM-NOOPT: # %bb.0: ; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leal (,%rdi,8), %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 8 ret i32 %mul } @@ -424,14 +424,14 @@ ; X64-HSW-LABEL: test_mul_by_9: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_9: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_9: ; X86-NOOPT: # %bb.0: @@ -441,26 +441,26 @@ ; HSW-NOOPT-LABEL: test_mul_by_9: ; HSW-NOOPT: # %bb.0: ; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_9: ; JAG-NOOPT: # %bb.0: ; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; JAG-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_9: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_9: ; SLM-NOOPT: # %bb.0: ; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 9 ret i32 %mul } @@ -476,16 +476,16 @@ ; X64-HSW-LABEL: test_mul_by_10: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] -; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: addl %edi, %edi +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_10: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: addl %edi, %edi +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_10: ; X86-NOOPT: # %bb.0: @@ -494,25 +494,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_10: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $10, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_10: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $10, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_10: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50] -; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: addl %edi, %edi +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_10: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $10, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 10 ret i32 %mul } @@ -528,16 +528,16 @@ ; X64-HSW-LABEL: test_mul_by_11: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax +; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_11: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax +; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_11: ; X86-NOOPT: # %bb.0: @@ -546,23 +546,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_11: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $11, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_11: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $11, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_11: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imull $11, %edi, %eax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imull $11, %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_11: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $11, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 11 ret i32 %mul } @@ -578,16 +578,16 @@ ; X64-HSW-LABEL: test_mul_by_12: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: shll $2, %edi +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_12: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: shll $2, %edi +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_12: ; X86-NOOPT: # %bb.0: @@ -596,25 +596,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_12: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $12, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_12: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $12, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_12: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00] -; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: shll $2, %edi +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_12: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $12, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 12 ret i32 %mul } @@ -630,16 +630,16 @@ ; X64-HSW-LABEL: test_mul_by_13: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_13: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_13: ; X86-NOOPT: # %bb.0: @@ -648,23 +648,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_13: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $13, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_13: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $13, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_13: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imull $13, %edi, %eax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imull $13, %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_13: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $13, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 13 ret i32 %mul } @@ -681,19 +681,19 @@ ; ; X64-HSW-LABEL: test_mul_by_14: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50] -; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movl %edi, %eax +; X64-HSW-NEXT: shll $4, %eax +; X64-HSW-NEXT: subl %edi, %eax +; X64-HSW-NEXT: subl %edi, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_14: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50] -; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movl %edi, %eax +; X64-JAG-NEXT: shll $4, %eax +; X64-JAG-NEXT: subl %edi, %eax +; X64-JAG-NEXT: subl %edi, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_14: ; X86-NOOPT: # %bb.0: @@ -702,26 +702,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_14: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $14, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_14: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $14, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_14: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00] -; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movl %edi, %eax +; X64-SLM-NEXT: shll $4, %eax +; X64-SLM-NEXT: subl %edi, %eax +; X64-SLM-NEXT: subl %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_14: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $14, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 14 ret i32 %mul } @@ -737,16 +737,16 @@ ; X64-HSW-LABEL: test_mul_by_15: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_15: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_15: ; X86-NOOPT: # %bb.0: @@ -755,25 +755,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_15: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $15, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_15: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $15, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_15: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] -; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax +; X64-SLM-NEXT: leal (%rax,%rax,2), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_15: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $15, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 15 ret i32 %mul } @@ -787,15 +787,15 @@ ; ; X64-HSW-LABEL: test_mul_by_16: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movl %edi, %eax +; X64-HSW-NEXT: shll $4, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movl %edi, %eax +; X64-JAG-NEXT: shll $4, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_16: ; X86-NOOPT: # %bb.0: @@ -805,27 +805,27 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_16: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] -; HSW-NOOPT-NEXT: shll $4, %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: movl %edi, %eax +; HSW-NOOPT-NEXT: shll $4, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] -; JAG-NOOPT-NEXT: shll $4, %eax # sched: [1:0.50] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: movl %edi, %eax +; JAG-NOOPT-NEXT: shll $4, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_16: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movl %edi, %eax +; X64-SLM-NEXT: shll $4, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_16: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] -; SLM-NOOPT-NEXT: shll $4, %eax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: movl %edi, %eax +; SLM-NOOPT-NEXT: shll $4, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 16 ret i32 %mul } @@ -842,18 +842,18 @@ ; X64-HSW-LABEL: test_mul_by_17: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movl %edi, %eax +; X64-HSW-NEXT: shll $4, %eax +; X64-HSW-NEXT: leal (%rax,%rdi), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_17: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movl %edi, %eax +; X64-JAG-NEXT: shll $4, %eax +; X64-JAG-NEXT: leal (%rax,%rdi), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_17: ; X86-NOOPT: # %bb.0: @@ -862,26 +862,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_17: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $17, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_17: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $17, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_17: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00] -; X64-SLM-NEXT: leal (%rax,%rdi), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movl %edi, %eax +; X64-SLM-NEXT: shll $4, %eax +; X64-SLM-NEXT: leal (%rax,%rdi), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_17: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $17, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 17 ret i32 %mul } @@ -897,16 +897,16 @@ ; X64-HSW-LABEL: test_mul_by_18: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] -; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: addl %edi, %edi +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_18: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: addl %edi, %edi +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_18: ; X86-NOOPT: # %bb.0: @@ -915,25 +915,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_18: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $18, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_18: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $18, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_18: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50] -; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: addl %edi, %edi +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_18: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $18, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 18 ret i32 %mul } @@ -949,16 +949,16 @@ ; X64-HSW-LABEL: test_mul_by_19: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax +; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_19: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax +; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_19: ; X86-NOOPT: # %bb.0: @@ -967,23 +967,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_19: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $19, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_19: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $19, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_19: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imull $19, %edi, %eax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imull $19, %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_19: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $19, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 19 ret i32 %mul } @@ -999,16 +999,16 @@ ; X64-HSW-LABEL: test_mul_by_20: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: shll $2, %edi +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_20: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: shll $2, %edi +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_20: ; X86-NOOPT: # %bb.0: @@ -1017,25 +1017,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_20: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $20, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_20: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $20, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_20: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00] -; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: shll $2, %edi +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_20: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $20, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 20 ret i32 %mul } @@ -1051,16 +1051,16 @@ ; X64-HSW-LABEL: test_mul_by_21: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_21: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_21: ; X86-NOOPT: # %bb.0: @@ -1069,23 +1069,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_21: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $21, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_21: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $21, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_21: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imull $21, %edi, %eax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imull $21, %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_21: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $21, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 21 ret i32 %mul } @@ -1102,18 +1102,18 @@ ; X64-HSW-LABEL: test_mul_by_22: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax +; X64-HSW-NEXT: addl %edi, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_22: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax +; X64-JAG-NEXT: addl %edi, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_22: ; X86-NOOPT: # %bb.0: @@ -1122,23 +1122,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_22: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $22, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_22: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $22, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_22: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imull $22, %edi, %eax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imull $22, %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_22: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $22, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 22 ret i32 %mul } @@ -1155,18 +1155,18 @@ ; X64-HSW-LABEL: test_mul_by_23: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: shll $3, %eax # sched: [1:0.50] -; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax +; X64-HSW-NEXT: shll $3, %eax +; X64-HSW-NEXT: subl %edi, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_23: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00] -; X64-JAG-NEXT: shll $3, %eax # sched: [1:0.50] -; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax +; X64-JAG-NEXT: shll $3, %eax +; X64-JAG-NEXT: subl %edi, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_23: ; X86-NOOPT: # %bb.0: @@ -1175,23 +1175,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_23: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $23, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_23: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $23, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_23: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imull $23, %edi, %eax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imull $23, %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_23: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $23, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 23 ret i32 %mul } @@ -1207,16 +1207,16 @@ ; X64-HSW-LABEL: test_mul_by_24: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: shll $3, %edi # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: shll $3, %edi +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_24: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: shll $3, %edi # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: shll $3, %edi +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_24: ; X86-NOOPT: # %bb.0: @@ -1225,25 +1225,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_24: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $24, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_24: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $24, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_24: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: shll $3, %edi # sched: [1:1.00] -; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: shll $3, %edi +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_24: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $24, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 24 ret i32 %mul } @@ -1259,16 +1259,16 @@ ; X64-HSW-LABEL: test_mul_by_25: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_25: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rax,%rax,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax +; X64-JAG-NEXT: leal (%rax,%rax,4), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_25: ; X86-NOOPT: # %bb.0: @@ -1277,25 +1277,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_25: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $25, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_25: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $25, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_25: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] -; X64-SLM-NEXT: leal (%rax,%rax,4), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax +; X64-SLM-NEXT: leal (%rax,%rax,4), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_25: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $25, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 25 ret i32 %mul } @@ -1312,18 +1312,18 @@ ; X64-HSW-LABEL: test_mul_by_26: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: addl %edi, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_26: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rax,%rax,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax +; X64-JAG-NEXT: leal (%rax,%rax,4), %eax +; X64-JAG-NEXT: addl %edi, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_26: ; X86-NOOPT: # %bb.0: @@ -1332,23 +1332,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_26: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $26, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_26: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $26, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_26: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imull $26, %edi, %eax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imull $26, %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_26: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $26, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 26 ret i32 %mul } @@ -1364,16 +1364,16 @@ ; X64-HSW-LABEL: test_mul_by_27: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_27: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_27: ; X86-NOOPT: # %bb.0: @@ -1382,25 +1382,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_27: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $27, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_27: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $27, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_27: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] -; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax +; X64-SLM-NEXT: leal (%rax,%rax,2), %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_27: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $27, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 27 ret i32 %mul } @@ -1417,18 +1417,18 @@ ; X64-HSW-LABEL: test_mul_by_28: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: addl %edi, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_28: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [2:1.00] -; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax +; X64-JAG-NEXT: addl %edi, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_28: ; X86-NOOPT: # %bb.0: @@ -1437,23 +1437,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_28: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $28, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_28: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $28, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_28: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imull $28, %edi, %eax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imull $28, %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_28: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $28, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 28 ret i32 %mul } @@ -1471,20 +1471,20 @@ ; X64-HSW-LABEL: test_mul_by_29: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: addl %edi, %eax +; X64-HSW-NEXT: addl %edi, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_29: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [2:1.00] -; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax +; X64-JAG-NEXT: addl %edi, %eax +; X64-JAG-NEXT: addl %edi, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_29: ; X86-NOOPT: # %bb.0: @@ -1493,23 +1493,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_29: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $29, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_29: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $29, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_29: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imull $29, %edi, %eax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imull $29, %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_29: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $29, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 29 ret i32 %mul } @@ -1526,19 +1526,19 @@ ; ; X64-HSW-LABEL: test_mul_by_30: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] -; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movl %edi, %eax +; X64-HSW-NEXT: shll $5, %eax +; X64-HSW-NEXT: subl %edi, %eax +; X64-HSW-NEXT: subl %edi, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_30: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50] -; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movl %edi, %eax +; X64-JAG-NEXT: shll $5, %eax +; X64-JAG-NEXT: subl %edi, %eax +; X64-JAG-NEXT: subl %edi, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_30: ; X86-NOOPT: # %bb.0: @@ -1547,26 +1547,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_30: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $30, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_30: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $30, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_30: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00] -; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movl %edi, %eax +; X64-SLM-NEXT: shll $5, %eax +; X64-SLM-NEXT: subl %edi, %eax +; X64-SLM-NEXT: subl %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_30: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $30, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 30 ret i32 %mul } @@ -1582,17 +1582,17 @@ ; ; X64-HSW-LABEL: test_mul_by_31: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] -; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movl %edi, %eax +; X64-HSW-NEXT: shll $5, %eax +; X64-HSW-NEXT: subl %edi, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_31: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50] -; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movl %edi, %eax +; X64-JAG-NEXT: shll $5, %eax +; X64-JAG-NEXT: subl %edi, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_31: ; X86-NOOPT: # %bb.0: @@ -1601,25 +1601,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_31: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $31, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_31: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $31, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_31: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00] -; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movl %edi, %eax +; X64-SLM-NEXT: shll $5, %eax +; X64-SLM-NEXT: subl %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_31: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $31, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 31 ret i32 %mul } @@ -1633,15 +1633,15 @@ ; ; X64-HSW-LABEL: test_mul_by_32: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movl %edi, %eax +; X64-HSW-NEXT: shll $5, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movl %edi, %eax +; X64-JAG-NEXT: shll $5, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_32: ; X86-NOOPT: # %bb.0: @@ -1651,27 +1651,27 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_32: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] -; HSW-NOOPT-NEXT: shll $5, %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: movl %edi, %eax +; HSW-NOOPT-NEXT: shll $5, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] -; JAG-NOOPT-NEXT: shll $5, %eax # sched: [1:0.50] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: movl %edi, %eax +; JAG-NOOPT-NEXT: shll $5, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_32: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movl %edi, %eax +; X64-SLM-NEXT: shll $5, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_32: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] -; SLM-NOOPT-NEXT: shll $5, %eax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: movl %edi, %eax +; SLM-NOOPT-NEXT: shll $5, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 32 ret i32 %mul } @@ -1687,16 +1687,16 @@ ; X64-HSW-LABEL: test_mul_by_37: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_37: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_37: ; X86-NOOPT: # %bb.0: @@ -1705,23 +1705,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_37: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $37, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $37, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_37: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $37, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $37, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_37: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imull $37, %edi, %eax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imull $37, %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_37: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $37, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $37, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 37 ret i32 %mul } @@ -1737,16 +1737,16 @@ ; X64-HSW-LABEL: test_mul_by_41: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rax,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax +; X64-HSW-NEXT: leal (%rdi,%rax,8), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_41: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rdi,%rax,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax +; X64-JAG-NEXT: leal (%rdi,%rax,8), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_41: ; X86-NOOPT: # %bb.0: @@ -1755,23 +1755,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_41: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $41, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $41, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_41: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $41, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $41, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_41: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imull $41, %edi, %eax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imull $41, %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_41: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $41, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $41, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 41 ret i32 %mul } @@ -1788,19 +1788,19 @@ ; ; X64-HSW-LABEL: test_mul_by_62: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $6, %eax # sched: [1:0.50] -; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movl %edi, %eax +; X64-HSW-NEXT: shll $6, %eax +; X64-HSW-NEXT: subl %edi, %eax +; X64-HSW-NEXT: subl %edi, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_62: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: shll $6, %eax # sched: [1:0.50] -; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movl %edi, %eax +; X64-JAG-NEXT: shll $6, %eax +; X64-JAG-NEXT: subl %edi, %eax +; X64-JAG-NEXT: subl %edi, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_62: ; X86-NOOPT: # %bb.0: @@ -1809,26 +1809,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_62: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $62, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $62, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_62: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $62, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $62, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_62: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: shll $6, %eax # sched: [1:1.00] -; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movl %edi, %eax +; X64-SLM-NEXT: shll $6, %eax +; X64-SLM-NEXT: subl %edi, %eax +; X64-SLM-NEXT: subl %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_62: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $62, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $62, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 62 ret i32 %mul } @@ -1845,18 +1845,18 @@ ; X64-HSW-LABEL: test_mul_by_66: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $6, %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movl %edi, %eax +; X64-HSW-NEXT: shll $6, %eax +; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_66: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: shll $6, %eax # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movl %edi, %eax +; X64-JAG-NEXT: shll $6, %eax +; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_66: ; X86-NOOPT: # %bb.0: @@ -1865,27 +1865,27 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_66: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $66, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $66, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_66: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $66, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $66, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_66: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: shll $6, %eax # sched: [1:1.00] -; X64-SLM-NEXT: leal (%rax,%rdi), %eax # sched: [1:1.00] -; X64-SLM-NEXT: addl %edi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movl %edi, %eax +; X64-SLM-NEXT: shll $6, %eax +; X64-SLM-NEXT: leal (%rax,%rdi), %eax +; X64-SLM-NEXT: addl %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_66: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $66, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $66, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 66 ret i32 %mul } @@ -1901,16 +1901,16 @@ ; X64-HSW-LABEL: test_mul_by_73: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rax,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax +; X64-HSW-NEXT: leal (%rdi,%rax,8), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_73: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: leal (%rdi,%rax,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax +; X64-JAG-NEXT: leal (%rdi,%rax,8), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_73: ; X86-NOOPT: # %bb.0: @@ -1919,23 +1919,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_73: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $73, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $73, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_73: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $73, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $73, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_73: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imull $73, %edi, %eax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imull $73, %edi, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_73: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $73, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $73, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 73 ret i32 %mul } @@ -1952,18 +1952,18 @@ ; X64-HSW-LABEL: test_mul_by_520: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $9, %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rax,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movl %edi, %eax +; X64-HSW-NEXT: shll $9, %eax +; X64-HSW-NEXT: leal (%rax,%rdi,8), %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_520: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: shll $9, %eax # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rax,%rdi,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movl %edi, %eax +; X64-JAG-NEXT: shll $9, %eax +; X64-JAG-NEXT: leal (%rax,%rdi,8), %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_520: ; X86-NOOPT: # %bb.0: @@ -1973,26 +1973,22 @@ ; HSW-NOOPT-LABEL: test_mul_by_520: ; HSW-NOOPT: # %bb.0: ; HSW-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208 -; HSW-NOOPT-NEXT: # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_520: ; JAG-NOOPT: # %bb.0: ; JAG-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208 -; JAG-NOOPT-NEXT: # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_520: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: imull $520, %edi, %eax # imm = 0x208 -; X64-SLM-NEXT: # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_520: ; SLM-NOOPT: # %bb.0: ; SLM-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208 -; SLM-NOOPT-NEXT: # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 520 ret i32 %mul } @@ -2009,18 +2005,18 @@ ; X64-HSW-LABEL: test_mul_by_neg10: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] -; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: negl %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: addl %edi, %edi +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax +; X64-HSW-NEXT: negl %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_neg10: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: negl %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: addl %edi, %edi +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax +; X64-JAG-NEXT: negl %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_neg10: ; X86-NOOPT: # %bb.0: @@ -2029,26 +2025,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_neg10: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $-10, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $-10, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_neg10: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $-10, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $-10, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_neg10: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50] -; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] -; X64-SLM-NEXT: negl %eax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: addl %edi, %edi +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax +; X64-SLM-NEXT: negl %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_neg10: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $-10, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $-10, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, -10 ret i32 %mul } @@ -2065,18 +2061,18 @@ ; X64-HSW-LABEL: test_mul_by_neg36: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: negl %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: shll $2, %edi +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax +; X64-HSW-NEXT: negl %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_neg36: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00] -; X64-JAG-NEXT: negl %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: shll $2, %edi +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax +; X64-JAG-NEXT: negl %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_neg36: ; X86-NOOPT: # %bb.0: @@ -2085,26 +2081,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_neg36: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $-36, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $-36, %edi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_neg36: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $-36, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $-36, %edi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_neg36: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00] -; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] -; X64-SLM-NEXT: negl %eax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: shll $2, %edi +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax +; X64-SLM-NEXT: negl %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_neg36: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $-36, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $-36, %edi, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, -36 ret i32 %mul } @@ -2122,20 +2118,20 @@ ; X64-HSW-LABEL: test_mul_spec: ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50] -; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25] -; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25] -; X64-HSW-NEXT: imull %ecx, %eax # sched: [3:1.00] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx +; X64-HSW-NEXT: addl $42, %ecx +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax +; X64-HSW-NEXT: addl $2, %eax +; X64-HSW-NEXT: imull %ecx, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_spec: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [2:1.00] -; X64-JAG-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [2:1.00] -; X64-JAG-NEXT: imull %ecx, %eax # sched: [3:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal 42(%rdi,%rdi,8), %ecx +; X64-JAG-NEXT: leal 2(%rdi,%rdi,4), %eax +; X64-JAG-NEXT: imull %ecx, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_spec: ; X86-NOOPT: # %bb.0: @@ -2148,36 +2144,36 @@ ; HSW-NOOPT-LABEL: test_mul_spec: ; HSW-NOOPT: # %bb.0: ; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50] -; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25] -; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25] -; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %ecx +; HSW-NOOPT-NEXT: addl $42, %ecx +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax +; HSW-NOOPT-NEXT: addl $2, %eax +; HSW-NOOPT-NEXT: imull %ecx, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_spec: ; JAG-NOOPT: # %bb.0: ; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; JAG-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [2:1.00] -; JAG-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [2:1.00] -; JAG-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx +; JAG-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax +; JAG-NOOPT-NEXT: imull %ecx, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_spec: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00] -; X64-SLM-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00] -; X64-SLM-NEXT: imull %ecx, %eax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leal 42(%rdi,%rdi,8), %ecx +; X64-SLM-NEXT: leal 2(%rdi,%rdi,4), %eax +; X64-SLM-NEXT: imull %ecx, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_spec: ; SLM-NOOPT: # %bb.0: ; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi -; SLM-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00] -; SLM-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00] -; SLM-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx +; SLM-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax +; SLM-NOOPT-NEXT: imull %ecx, %eax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i32 %x, 9 %add = add nsw i32 %mul, 42 %mul2 = mul nsw i32 %x, 5 @@ -2200,19 +2196,19 @@ ; ; X64-HSW-LABEL: mul_neg_fold: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movl %esi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: movl %esi, %eax ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi -; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50] -; X64-HSW-NEXT: subl %ecx, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx +; X64-HSW-NEXT: subl %ecx, %eax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: mul_neg_fold: ; X64-JAG: # %bb.0: ; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi -; X64-JAG-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [2:1.00] -; X64-JAG-NEXT: movl %esi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: subl %ecx, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %ecx +; X64-JAG-NEXT: movl %esi, %eax +; X64-JAG-NEXT: subl %ecx, %eax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: mul_neg_fold: ; X86-NOOPT: # %bb.0: @@ -2222,29 +2218,29 @@ ; ; HSW-NOOPT-LABEL: mul_neg_fold: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imull $-9, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: addl %esi, %eax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imull $-9, %edi, %eax +; HSW-NOOPT-NEXT: addl %esi, %eax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: mul_neg_fold: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imull $-9, %edi, %eax # sched: [3:1.00] -; JAG-NOOPT-NEXT: addl %esi, %eax # sched: [1:0.50] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imull $-9, %edi, %eax +; JAG-NOOPT-NEXT: addl %esi, %eax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: mul_neg_fold: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi -; X64-SLM-NEXT: movl %esi, %eax # sched: [1:0.50] -; X64-SLM-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:1.00] -; X64-SLM-NEXT: subl %ecx, %eax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movl %esi, %eax +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %ecx +; X64-SLM-NEXT: subl %ecx, %eax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: mul_neg_fold: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imull $-9, %edi, %eax # sched: [3:1.00] -; SLM-NOOPT-NEXT: addl %esi, %eax # sched: [1:0.50] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imull $-9, %edi, %eax +; SLM-NOOPT-NEXT: addl %esi, %eax +; SLM-NOOPT-NEXT: retq %c = mul i32 %a, -9 %d = add i32 %b, %c ret i32 %d Index: test/CodeGen/X86/mul-constant-i64.ll =================================================================== --- test/CodeGen/X86/mul-constant-i64.ll +++ test/CodeGen/X86/mul-constant-i64.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW -; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefix=X64-HSW +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=btver2 | FileCheck %s --check-prefix=X64-JAG ; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT -; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT -; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT -; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM -; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -mcpu=haswell | FileCheck %s --check-prefix=HSW-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -mcpu=btver2 | FileCheck %s --check-prefix=JAG-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=slm | FileCheck %s --check-prefix=X64-SLM +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -mcpu=slm | FileCheck %s --check-prefix=SLM-NOOPT define i64 @test_mul_by_1(i64 %x) nounwind { ; X86-LABEL: test_mul_by_1: @@ -17,13 +17,13 @@ ; ; X64-HSW-LABEL: test_mul_by_1: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movq %rdi, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_1: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movq %rdi, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_1: ; X86-NOOPT: # %bb.0: @@ -33,23 +33,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_1: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: movq %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_1: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: movq %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_1: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movq %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_1: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: movq %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 1 ret i64 %mul } @@ -65,13 +65,13 @@ ; ; X64-HSW-LABEL: test_mul_by_2: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_2: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_2: ; X86-NOOPT: # %bb.0: @@ -83,23 +83,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_2: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_2: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi), %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_2: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_2: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi), %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 2 ret i64 %mul } @@ -116,13 +116,13 @@ ; ; X64-HSW-LABEL: test_mul_by_3: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_3: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_3: ; X86-NOOPT: # %bb.0: @@ -134,23 +134,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_3: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_3: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_3: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_3: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 3 ret i64 %mul } @@ -166,13 +166,13 @@ ; ; X64-HSW-LABEL: test_mul_by_4: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (,%rdi,4), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_4: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (,%rdi,4), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_4: ; X86-NOOPT: # %bb.0: @@ -184,23 +184,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_4: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_4: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [2:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leaq (,%rdi,4), %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_4: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leaq (,%rdi,4), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_4: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leaq (,%rdi,4), %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 4 ret i64 %mul } @@ -217,13 +217,13 @@ ; ; X64-HSW-LABEL: test_mul_by_5: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_5: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_5: ; X86-NOOPT: # %bb.0: @@ -235,23 +235,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_5: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_5: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_5: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_5: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 5 ret i64 %mul } @@ -268,15 +268,15 @@ ; ; X64-HSW-LABEL: test_mul_by_6: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] -; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: addq %rdi, %rdi +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_6: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: addq %rdi, %rdi +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_6: ; X86-NOOPT: # %bb.0: @@ -288,24 +288,24 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_6: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_6: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $6, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_6: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50] -; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: addq %rdi, %rdi +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_6: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $6, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 6 ret i64 %mul } @@ -323,15 +323,15 @@ ; ; X64-HSW-LABEL: test_mul_by_7: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (,%rdi,8), %rax +; X64-HSW-NEXT: subq %rdi, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_7: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (,%rdi,8), %rax +; X64-JAG-NEXT: subq %rdi, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_7: ; X86-NOOPT: # %bb.0: @@ -343,24 +343,24 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_7: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_7: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $7, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_7: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00] -; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leaq (,%rdi,8), %rax +; X64-SLM-NEXT: subq %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_7: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $7, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 7 ret i64 %mul } @@ -376,13 +376,13 @@ ; ; X64-HSW-LABEL: test_mul_by_8: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (,%rdi,8), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_8: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (,%rdi,8), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_8: ; X86-NOOPT: # %bb.0: @@ -394,23 +394,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_8: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_8: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [2:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leaq (,%rdi,8), %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_8: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leaq (,%rdi,8), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_8: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leaq (,%rdi,8), %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 8 ret i64 %mul } @@ -427,13 +427,13 @@ ; ; X64-HSW-LABEL: test_mul_by_9: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_9: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_9: ; X86-NOOPT: # %bb.0: @@ -445,23 +445,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_9: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_9: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_9: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_9: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 9 ret i64 %mul } @@ -478,15 +478,15 @@ ; ; X64-HSW-LABEL: test_mul_by_10: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] -; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: addq %rdi, %rdi +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_10: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: addq %rdi, %rdi +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_10: ; X86-NOOPT: # %bb.0: @@ -498,24 +498,24 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_10: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_10: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $10, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_10: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50] -; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: addq %rdi, %rdi +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_10: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $10, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 10 ret i64 %mul } @@ -533,15 +533,15 @@ ; ; X64-HSW-LABEL: test_mul_by_11: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_11: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_11: ; X86-NOOPT: # %bb.0: @@ -553,23 +553,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_11: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_11: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $11, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_11: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imulq $11, %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_11: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $11, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 11 ret i64 %mul } @@ -586,15 +586,15 @@ ; ; X64-HSW-LABEL: test_mul_by_12: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: shlq $2, %rdi +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_12: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: shlq $2, %rdi +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_12: ; X86-NOOPT: # %bb.0: @@ -606,24 +606,24 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_12: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_12: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $12, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_12: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00] -; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: shlq $2, %rdi +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_12: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $12, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 12 ret i64 %mul } @@ -641,15 +641,15 @@ ; ; X64-HSW-LABEL: test_mul_by_13: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_13: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_13: ; X86-NOOPT: # %bb.0: @@ -661,23 +661,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_13: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_13: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $13, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_13: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imulq $13, %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_13: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $13, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 13 ret i64 %mul } @@ -697,19 +697,19 @@ ; ; X64-HSW-LABEL: test_mul_by_14: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50] -; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movq %rdi, %rax +; X64-HSW-NEXT: shlq $4, %rax +; X64-HSW-NEXT: subq %rdi, %rax +; X64-HSW-NEXT: subq %rdi, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_14: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50] -; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movq %rdi, %rax +; X64-JAG-NEXT: shlq $4, %rax +; X64-JAG-NEXT: subq %rdi, %rax +; X64-JAG-NEXT: subq %rdi, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_14: ; X86-NOOPT: # %bb.0: @@ -721,26 +721,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_14: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_14: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $14, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_14: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00] -; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movq %rdi, %rax +; X64-SLM-NEXT: shlq $4, %rax +; X64-SLM-NEXT: subq %rdi, %rax +; X64-SLM-NEXT: subq %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_14: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $14, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 14 ret i64 %mul } @@ -758,15 +758,15 @@ ; ; X64-HSW-LABEL: test_mul_by_15: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_15: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_15: ; X86-NOOPT: # %bb.0: @@ -778,24 +778,24 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_15: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_15: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $15, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_15: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] -; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_15: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $15, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 15 ret i64 %mul } @@ -811,15 +811,15 @@ ; ; X64-HSW-LABEL: test_mul_by_16: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movq %rdi, %rax +; X64-HSW-NEXT: shlq $4, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movq %rdi, %rax +; X64-JAG-NEXT: shlq $4, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_16: ; X86-NOOPT: # %bb.0: @@ -831,27 +831,27 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_16: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HSW-NOOPT-NEXT: shlq $4, %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: movq %rdi, %rax +; HSW-NOOPT-NEXT: shlq $4, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] -; JAG-NOOPT-NEXT: shlq $4, %rax # sched: [1:0.50] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: movq %rdi, %rax +; JAG-NOOPT-NEXT: shlq $4, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_16: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movq %rdi, %rax +; X64-SLM-NEXT: shlq $4, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_16: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] -; SLM-NOOPT-NEXT: shlq $4, %rax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: movq %rdi, %rax +; SLM-NOOPT-NEXT: shlq $4, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 16 ret i64 %mul } @@ -870,17 +870,17 @@ ; ; X64-HSW-LABEL: test_mul_by_17: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movq %rdi, %rax +; X64-HSW-NEXT: shlq $4, %rax +; X64-HSW-NEXT: leaq (%rax,%rdi), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_17: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movq %rdi, %rax +; X64-JAG-NEXT: shlq $4, %rax +; X64-JAG-NEXT: leaq (%rax,%rdi), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_17: ; X86-NOOPT: # %bb.0: @@ -892,25 +892,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_17: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_17: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $17, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_17: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00] -; X64-SLM-NEXT: addq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movq %rdi, %rax +; X64-SLM-NEXT: shlq $4, %rax +; X64-SLM-NEXT: addq %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_17: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $17, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 17 ret i64 %mul } @@ -927,15 +927,15 @@ ; ; X64-HSW-LABEL: test_mul_by_18: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] -; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: addq %rdi, %rdi +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_18: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: addq %rdi, %rdi +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_18: ; X86-NOOPT: # %bb.0: @@ -947,24 +947,24 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_18: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_18: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $18, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_18: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50] -; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: addq %rdi, %rdi +; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_18: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $18, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 18 ret i64 %mul } @@ -982,15 +982,15 @@ ; ; X64-HSW-LABEL: test_mul_by_19: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_19: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_19: ; X86-NOOPT: # %bb.0: @@ -1002,23 +1002,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_19: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_19: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $19, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_19: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imulq $19, %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_19: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $19, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 19 ret i64 %mul } @@ -1035,15 +1035,15 @@ ; ; X64-HSW-LABEL: test_mul_by_20: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: shlq $2, %rdi +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_20: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: shlq $2, %rdi +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_20: ; X86-NOOPT: # %bb.0: @@ -1055,24 +1055,24 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_20: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_20: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $20, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_20: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00] -; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: shlq $2, %rdi +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_20: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $20, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 20 ret i64 %mul } @@ -1090,15 +1090,15 @@ ; ; X64-HSW-LABEL: test_mul_by_21: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_21: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_21: ; X86-NOOPT: # %bb.0: @@ -1110,23 +1110,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_21: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_21: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $21, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_21: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imulq $21, %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_21: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $21, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 21 ret i64 %mul } @@ -1145,17 +1145,17 @@ ; ; X64-HSW-LABEL: test_mul_by_22: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax +; X64-HSW-NEXT: addq %rdi, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_22: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax +; X64-JAG-NEXT: addq %rdi, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_22: ; X86-NOOPT: # %bb.0: @@ -1167,23 +1167,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_22: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_22: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $22, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_22: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imulq $22, %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_22: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $22, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 22 ret i64 %mul } @@ -1202,17 +1202,17 @@ ; ; X64-HSW-LABEL: test_mul_by_23: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50] -; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-HSW-NEXT: shlq $3, %rax +; X64-HSW-NEXT: subq %rdi, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_23: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00] -; X64-JAG-NEXT: shlq $3, %rax # sched: [1:0.50] -; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-JAG-NEXT: shlq $3, %rax +; X64-JAG-NEXT: subq %rdi, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_23: ; X86-NOOPT: # %bb.0: @@ -1224,23 +1224,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_23: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_23: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $23, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_23: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imulq $23, %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_23: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $23, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 23 ret i64 %mul } @@ -1257,15 +1257,15 @@ ; ; X64-HSW-LABEL: test_mul_by_24: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: shlq $3, %rdi +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_24: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: shlq $3, %rdi # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: shlq $3, %rdi +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_24: ; X86-NOOPT: # %bb.0: @@ -1277,24 +1277,24 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_24: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_24: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $24, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_24: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: shlq $3, %rdi # sched: [1:1.00] -; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: shlq $3, %rdi +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_24: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $24, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 24 ret i64 %mul } @@ -1312,15 +1312,15 @@ ; ; X64-HSW-LABEL: test_mul_by_25: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_25: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rax,%rax,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-JAG-NEXT: leaq (%rax,%rax,4), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_25: ; X86-NOOPT: # %bb.0: @@ -1332,24 +1332,24 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_25: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_25: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $25, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_25: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] -; X64-SLM-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-SLM-NEXT: leaq (%rax,%rax,4), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_25: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $25, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 25 ret i64 %mul } @@ -1368,17 +1368,17 @@ ; ; X64-HSW-LABEL: test_mul_by_26: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax +; X64-HSW-NEXT: addq %rdi, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_26: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rax,%rax,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-JAG-NEXT: leaq (%rax,%rax,4), %rax +; X64-JAG-NEXT: addq %rdi, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_26: ; X86-NOOPT: # %bb.0: @@ -1390,23 +1390,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_26: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_26: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $26, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_26: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imulq $26, %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_26: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $26, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 26 ret i64 %mul } @@ -1424,15 +1424,15 @@ ; ; X64-HSW-LABEL: test_mul_by_27: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_27: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_27: ; X86-NOOPT: # %bb.0: @@ -1444,24 +1444,24 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_27: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_27: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $27, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_27: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] -; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_27: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $27, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 27 ret i64 %mul } @@ -1480,17 +1480,17 @@ ; ; X64-HSW-LABEL: test_mul_by_28: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax +; X64-HSW-NEXT: addq %rdi, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_28: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [2:1.00] -; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax +; X64-JAG-NEXT: addq %rdi, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_28: ; X86-NOOPT: # %bb.0: @@ -1502,23 +1502,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_28: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_28: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $28, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_28: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imulq $28, %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_28: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $28, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 28 ret i64 %mul } @@ -1538,19 +1538,19 @@ ; ; X64-HSW-LABEL: test_mul_by_29: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax +; X64-HSW-NEXT: addq %rdi, %rax +; X64-HSW-NEXT: addq %rdi, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_29: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [2:1.00] -; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax +; X64-JAG-NEXT: addq %rdi, %rax +; X64-JAG-NEXT: addq %rdi, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_29: ; X86-NOOPT: # %bb.0: @@ -1562,23 +1562,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_29: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_29: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $29, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_29: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imulq $29, %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_29: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $29, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 29 ret i64 %mul } @@ -1598,19 +1598,19 @@ ; ; X64-HSW-LABEL: test_mul_by_30: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] -; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movq %rdi, %rax +; X64-HSW-NEXT: shlq $5, %rax +; X64-HSW-NEXT: subq %rdi, %rax +; X64-HSW-NEXT: subq %rdi, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_30: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50] -; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movq %rdi, %rax +; X64-JAG-NEXT: shlq $5, %rax +; X64-JAG-NEXT: subq %rdi, %rax +; X64-JAG-NEXT: subq %rdi, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_30: ; X86-NOOPT: # %bb.0: @@ -1622,26 +1622,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_30: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_30: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $30, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_30: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00] -; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movq %rdi, %rax +; X64-SLM-NEXT: shlq $5, %rax +; X64-SLM-NEXT: subq %rdi, %rax +; X64-SLM-NEXT: subq %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_30: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $30, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 30 ret i64 %mul } @@ -1660,17 +1660,17 @@ ; ; X64-HSW-LABEL: test_mul_by_31: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] -; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movq %rdi, %rax +; X64-HSW-NEXT: shlq $5, %rax +; X64-HSW-NEXT: subq %rdi, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_31: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50] -; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movq %rdi, %rax +; X64-JAG-NEXT: shlq $5, %rax +; X64-JAG-NEXT: subq %rdi, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_31: ; X86-NOOPT: # %bb.0: @@ -1682,25 +1682,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_31: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_31: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $31, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_31: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00] -; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movq %rdi, %rax +; X64-SLM-NEXT: shlq $5, %rax +; X64-SLM-NEXT: subq %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_31: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $31, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 31 ret i64 %mul } @@ -1716,15 +1716,15 @@ ; ; X64-HSW-LABEL: test_mul_by_32: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movq %rdi, %rax +; X64-HSW-NEXT: shlq $5, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movq %rdi, %rax +; X64-JAG-NEXT: shlq $5, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_32: ; X86-NOOPT: # %bb.0: @@ -1736,27 +1736,27 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_32: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HSW-NOOPT-NEXT: shlq $5, %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: movq %rdi, %rax +; HSW-NOOPT-NEXT: shlq $5, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] -; JAG-NOOPT-NEXT: shlq $5, %rax # sched: [1:0.50] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: movq %rdi, %rax +; JAG-NOOPT-NEXT: shlq $5, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_32: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movq %rdi, %rax +; X64-SLM-NEXT: shlq $5, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_32: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] -; SLM-NOOPT-NEXT: shlq $5, %rax # sched: [1:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: movq %rdi, %rax +; SLM-NOOPT-NEXT: shlq $5, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 32 ret i64 %mul } @@ -1774,15 +1774,15 @@ ; ; X64-HSW-LABEL: test_mul_by_37: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_37: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_37: ; X86-NOOPT: # %bb.0: @@ -1794,23 +1794,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_37: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $37, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $37, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_37: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $37, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $37, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_37: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imulq $37, %rdi, %rax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imulq $37, %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_37: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $37, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $37, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 37 ret i64 %mul } @@ -1828,15 +1828,15 @@ ; ; X64-HSW-LABEL: test_mul_by_41: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rax,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-HSW-NEXT: leaq (%rdi,%rax,8), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_41: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rdi,%rax,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-JAG-NEXT: leaq (%rdi,%rax,8), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_41: ; X86-NOOPT: # %bb.0: @@ -1848,23 +1848,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_41: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $41, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $41, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_41: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $41, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $41, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_41: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imulq $41, %rdi, %rax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imulq $41, %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_41: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $41, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $41, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 41 ret i64 %mul } @@ -1884,19 +1884,19 @@ ; ; X64-HSW-LABEL: test_mul_by_62: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: shlq $6, %rax # sched: [1:0.50] -; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movq %rdi, %rax +; X64-HSW-NEXT: shlq $6, %rax +; X64-HSW-NEXT: subq %rdi, %rax +; X64-HSW-NEXT: subq %rdi, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_62: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: shlq $6, %rax # sched: [1:0.50] -; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movq %rdi, %rax +; X64-JAG-NEXT: shlq $6, %rax +; X64-JAG-NEXT: subq %rdi, %rax +; X64-JAG-NEXT: subq %rdi, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_62: ; X86-NOOPT: # %bb.0: @@ -1908,26 +1908,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_62: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $62, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $62, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_62: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $62, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $62, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_62: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: shlq $6, %rax # sched: [1:1.00] -; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movq %rdi, %rax +; X64-SLM-NEXT: shlq $6, %rax +; X64-SLM-NEXT: subq %rdi, %rax +; X64-SLM-NEXT: subq %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_62: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $62, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $62, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 62 ret i64 %mul } @@ -1946,17 +1946,17 @@ ; ; X64-HSW-LABEL: test_mul_by_66: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: shlq $6, %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movq %rdi, %rax +; X64-HSW-NEXT: shlq $6, %rax +; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_66: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: shlq $6, %rax # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movq %rdi, %rax +; X64-JAG-NEXT: shlq $6, %rax +; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_66: ; X86-NOOPT: # %bb.0: @@ -1968,26 +1968,26 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_66: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $66, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $66, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_66: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $66, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $66, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_66: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: shlq $6, %rax # sched: [1:1.00] -; X64-SLM-NEXT: addq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: addq %rdi, %rax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: movq %rdi, %rax +; X64-SLM-NEXT: shlq $6, %rax +; X64-SLM-NEXT: addq %rdi, %rax +; X64-SLM-NEXT: addq %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_66: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $66, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $66, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 66 ret i64 %mul } @@ -2005,15 +2005,15 @@ ; ; X64-HSW-LABEL: test_mul_by_73: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rax,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-HSW-NEXT: leaq (%rdi,%rax,8), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_73: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: leaq (%rdi,%rax,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-JAG-NEXT: leaq (%rdi,%rax,8), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_73: ; X86-NOOPT: # %bb.0: @@ -2025,23 +2025,23 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_73: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $73, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $73, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_73: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $73, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $73, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_73: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: imulq $73, %rdi, %rax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: imulq $73, %rdi, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_73: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $73, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $73, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 73 ret i64 %mul } @@ -2060,17 +2060,17 @@ ; ; X64-HSW-LABEL: test_mul_by_520: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: shlq $9, %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rax,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: movq %rdi, %rax +; X64-HSW-NEXT: shlq $9, %rax +; X64-HSW-NEXT: leaq (%rax,%rdi,8), %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_520: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: shlq $9, %rax # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rax,%rdi,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: movq %rdi, %rax +; X64-JAG-NEXT: shlq $9, %rax +; X64-JAG-NEXT: leaq (%rax,%rdi,8), %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_520: ; X86-NOOPT: # %bb.0: @@ -2083,26 +2083,22 @@ ; HSW-NOOPT-LABEL: test_mul_by_520: ; HSW-NOOPT: # %bb.0: ; HSW-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208 -; HSW-NOOPT-NEXT: # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_520: ; JAG-NOOPT: # %bb.0: ; JAG-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208 -; JAG-NOOPT-NEXT: # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_520: ; X64-SLM: # %bb.0: ; X64-SLM-NEXT: imulq $520, %rdi, %rax # imm = 0x208 -; X64-SLM-NEXT: # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_520: ; SLM-NOOPT: # %bb.0: ; SLM-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208 -; SLM-NOOPT-NEXT: # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 520 ret i64 %mul } @@ -2128,17 +2124,17 @@ ; ; X64-HSW-LABEL: test_mul_by_neg10: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] -; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: negq %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: addq %rdi, %rdi +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-HSW-NEXT: negq %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_neg10: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: negq %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: addq %rdi, %rdi +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-JAG-NEXT: negq %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_neg10: ; X86-NOOPT: # %bb.0: @@ -2153,25 +2149,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_neg10: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $-10, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $-10, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_neg10: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $-10, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $-10, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_neg10: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50] -; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] -; X64-SLM-NEXT: negq %rax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: addq %rdi, %rdi +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-SLM-NEXT: negq %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_neg10: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $-10, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $-10, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, -10 ret i64 %mul } @@ -2197,17 +2193,17 @@ ; ; X64-HSW-LABEL: test_mul_by_neg36: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: negq %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: shlq $2, %rdi +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-HSW-NEXT: negq %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_by_neg36: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00] -; X64-JAG-NEXT: negq %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: shlq $2, %rdi +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-JAG-NEXT: negq %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_by_neg36: ; X86-NOOPT: # %bb.0: @@ -2222,25 +2218,25 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_neg36: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: imulq $-36, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: imulq $-36, %rdi, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_by_neg36: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: imulq $-36, %rdi, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: imulq $-36, %rdi, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_by_neg36: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00] -; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] -; X64-SLM-NEXT: negq %rax # sched: [1:0.50] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: shlq $2, %rdi +; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax +; X64-SLM-NEXT: negq %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_by_neg36: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: imulq $-36, %rdi, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: imulq $-36, %rdi, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, -36 ret i64 %mul } @@ -2281,19 +2277,19 @@ ; ; X64-HSW-LABEL: test_mul_spec: ; X64-HSW: # %bb.0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50] -; X64-HSW-NEXT: addq $42, %rcx # sched: [1:0.25] -; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25] -; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00] -; X64-HSW-NEXT: retq # sched: [7:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rcx +; X64-HSW-NEXT: addq $42, %rcx +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax +; X64-HSW-NEXT: addq $2, %rax +; X64-HSW-NEXT: imulq %rcx, %rax +; X64-HSW-NEXT: retq ; ; X64-JAG-LABEL: test_mul_spec: ; X64-JAG: # %bb.0: -; X64-JAG-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [2:1.00] -; X64-JAG-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [2:1.00] -; X64-JAG-NEXT: imulq %rcx, %rax # sched: [6:4.00] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq 42(%rdi,%rdi,8), %rcx +; X64-JAG-NEXT: leaq 2(%rdi,%rdi,4), %rax +; X64-JAG-NEXT: imulq %rcx, %rax +; X64-JAG-NEXT: retq ; ; X86-NOOPT-LABEL: test_mul_spec: ; X86-NOOPT: # %bb.0: @@ -2329,33 +2325,33 @@ ; ; HSW-NOOPT-LABEL: test_mul_spec: ; HSW-NOOPT: # %bb.0: -; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50] -; HSW-NOOPT-NEXT: addq $42, %rcx # sched: [1:0.25] -; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25] -; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [7:1.00] +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rcx +; HSW-NOOPT-NEXT: addq $42, %rcx +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax +; HSW-NOOPT-NEXT: addq $2, %rax +; HSW-NOOPT-NEXT: imulq %rcx, %rax +; HSW-NOOPT-NEXT: retq ; ; JAG-NOOPT-LABEL: test_mul_spec: ; JAG-NOOPT: # %bb.0: -; JAG-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [2:1.00] -; JAG-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [2:1.00] -; JAG-NOOPT-NEXT: imulq %rcx, %rax # sched: [6:4.00] -; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; JAG-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx +; JAG-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax +; JAG-NOOPT-NEXT: imulq %rcx, %rax +; JAG-NOOPT-NEXT: retq ; ; X64-SLM-LABEL: test_mul_spec: ; X64-SLM: # %bb.0: -; X64-SLM-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00] -; X64-SLM-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00] -; X64-SLM-NEXT: imulq %rcx, %rax # sched: [3:1.00] -; X64-SLM-NEXT: retq # sched: [4:1.00] +; X64-SLM-NEXT: leaq 42(%rdi,%rdi,8), %rcx +; X64-SLM-NEXT: leaq 2(%rdi,%rdi,4), %rax +; X64-SLM-NEXT: imulq %rcx, %rax +; X64-SLM-NEXT: retq ; ; SLM-NOOPT-LABEL: test_mul_spec: ; SLM-NOOPT: # %bb.0: -; SLM-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00] -; SLM-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00] -; SLM-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] -; SLM-NOOPT-NEXT: retq # sched: [4:1.00] +; SLM-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx +; SLM-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax +; SLM-NOOPT-NEXT: imulq %rcx, %rax +; SLM-NOOPT-NEXT: retq %mul = mul nsw i64 %x, 9 %add = add nsw i64 %mul, 42 %mul2 = mul nsw i64 %x, 5 Index: test/CodeGen/X86/mwaitx-schedule.ll =================================================================== --- test/CodeGen/X86/mwaitx-schedule.ll +++ test/CodeGen/X86/mwaitx-schedule.ll @@ -1,65 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+mwaitx | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER4 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1 - -define void @foo(i8* %P, i32 %E, i32 %H) nounwind { -; GENERIC-LABEL: foo: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; GENERIC-NEXT: monitorx # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER4-LABEL: foo: -; BDVER4: # %bb.0: -; BDVER4-NEXT: movl %esi, %ecx -; BDVER4-NEXT: leaq (%rdi), %rax -; BDVER4-NEXT: monitorx -; BDVER4-NEXT: retq -; -; ZNVER1-LABEL: foo: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25] -; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] -; ZNVER1-NEXT: monitorx # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void @llvm.x86.monitorx(i8* %P, i32 %E, i32 %H) - ret void -} -declare void @llvm.x86.monitorx(i8*, i32, i32) nounwind - -define void @bar(i32 %E, i32 %H, i32 %C) nounwind { -; GENERIC-LABEL: bar: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pushq %rbx # sched: [5:1.00] -; GENERIC-NEXT: movl %edx, %ebx # sched: [1:0.33] -; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] -; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: mwaitx # sched: [100:0.33] -; GENERIC-NEXT: popq %rbx # sched: [6:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER4-LABEL: bar: -; BDVER4: # %bb.0: -; BDVER4-NEXT: pushq %rbx -; BDVER4-NEXT: movl %edx, %ebx -; BDVER4-NEXT: movl %esi, %eax -; BDVER4-NEXT: movl %edi, %ecx -; BDVER4-NEXT: mwaitx -; BDVER4-NEXT: popq %rbx -; BDVER4-NEXT: retq -; -; ZNVER1-LABEL: bar: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pushq %rbx # sched: [1:0.50] -; ZNVER1-NEXT: movl %edx, %ebx # sched: [1:0.25] -; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25] -; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] -; ZNVER1-NEXT: mwaitx # sched: [100:0.25] -; ZNVER1-NEXT: popq %rbx # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void @llvm.x86.mwaitx(i32 %E, i32 %H, i32 %C) - ret void -} -declare void @llvm.x86.mwaitx(i32, i32, i32) nounwind Index: test/CodeGen/X86/popcnt-schedule.ll =================================================================== --- test/CodeGen/X86/popcnt-schedule.ll +++ test/CodeGen/X86/popcnt-schedule.ll @@ -1,235 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+popcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) { -; GENERIC-LABEL: test_ctpop_i16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: popcntw (%rsi), %cx # sched: [9:1.00] -; GENERIC-NEXT: popcntw %di, %ax # sched: [3:1.00] -; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_ctpop_i16: -; SLM: # %bb.0: -; SLM-NEXT: popcntw (%rsi), %cx # sched: [6:1.00] -; SLM-NEXT: popcntw %di, %ax # sched: [3:1.00] -; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: # kill: def $ax killed $ax killed $eax -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_ctpop_i16: -; SANDY: # %bb.0: -; SANDY-NEXT: popcntw (%rsi), %cx # sched: [9:1.00] -; SANDY-NEXT: popcntw %di, %ax # sched: [3:1.00] -; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: # kill: def $ax killed $ax killed $eax -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_ctpop_i16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: popcntw (%rsi), %cx # sched: [8:1.00] -; HASWELL-NEXT: popcntw %di, %ax # sched: [3:1.00] -; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ctpop_i16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: popcntw (%rsi), %cx # sched: [8:1.00] -; BROADWELL-NEXT: popcntw %di, %ax # sched: [3:1.00] -; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ctpop_i16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: popcntw (%rsi), %cx # sched: [8:1.00] -; SKYLAKE-NEXT: popcntw %di, %ax # sched: [3:1.00] -; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_ctpop_i16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: popcntw (%rsi), %cx # sched: [8:0.50] -; BDVER2-NEXT: popcntw %di, %ax # sched: [4:0.50] -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_ctpop_i16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: popcntw (%rsi), %cx # sched: [4:1.00] -; BTVER2-NEXT: popcntw %di, %ax # sched: [1:0.50] -; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ctpop_i16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: popcntw (%rsi), %cx # sched: [5:0.50] -; ZNVER1-NEXT: popcntw %di, %ax # sched: [1:0.25] -; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i16, i16 *%a1 - %2 = tail call i16 @llvm.ctpop.i16( i16 %1 ) - %3 = tail call i16 @llvm.ctpop.i16( i16 %a0 ) - %4 = or i16 %2, %3 - ret i16 %4 -} -declare i16 @llvm.ctpop.i16(i16) - -define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) { -; GENERIC-LABEL: test_ctpop_i32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: popcntl (%rsi), %ecx # sched: [9:1.00] -; GENERIC-NEXT: popcntl %edi, %eax # sched: [3:1.00] -; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_ctpop_i32: -; SLM: # %bb.0: -; SLM-NEXT: popcntl (%rsi), %ecx # sched: [6:1.00] -; SLM-NEXT: popcntl %edi, %eax # sched: [3:1.00] -; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_ctpop_i32: -; SANDY: # %bb.0: -; SANDY-NEXT: popcntl (%rsi), %ecx # sched: [9:1.00] -; SANDY-NEXT: popcntl %edi, %eax # sched: [3:1.00] -; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_ctpop_i32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: popcntl (%rsi), %ecx # sched: [8:1.00] -; HASWELL-NEXT: popcntl %edi, %eax # sched: [3:1.00] -; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ctpop_i32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: popcntl (%rsi), %ecx # sched: [8:1.00] -; BROADWELL-NEXT: popcntl %edi, %eax # sched: [3:1.00] -; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ctpop_i32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: popcntl (%rsi), %ecx # sched: [8:1.00] -; SKYLAKE-NEXT: popcntl %edi, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_ctpop_i32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: popcntl (%rsi), %ecx # sched: [8:0.50] -; BDVER2-NEXT: popcntl %edi, %eax # sched: [4:0.50] -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_ctpop_i32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: popcntl (%rsi), %ecx # sched: [4:1.00] -; BTVER2-NEXT: popcntl %edi, %eax # sched: [1:0.50] -; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ctpop_i32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: popcntl (%rsi), %ecx # sched: [5:0.50] -; ZNVER1-NEXT: popcntl %edi, %eax # sched: [1:0.25] -; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i32, i32 *%a1 - %2 = tail call i32 @llvm.ctpop.i32( i32 %1 ) - %3 = tail call i32 @llvm.ctpop.i32( i32 %a0 ) - %4 = or i32 %2, %3 - ret i32 %4 -} -declare i32 @llvm.ctpop.i32(i32) - -define i64 @test_ctpop_i64(i64 %a0, i64 *%a1) { -; GENERIC-LABEL: test_ctpop_i64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: popcntq (%rsi), %rcx # sched: [9:1.00] -; GENERIC-NEXT: popcntq %rdi, %rax # sched: [3:1.00] -; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_ctpop_i64: -; SLM: # %bb.0: -; SLM-NEXT: popcntq (%rsi), %rcx # sched: [6:1.00] -; SLM-NEXT: popcntq %rdi, %rax # sched: [3:1.00] -; SLM-NEXT: orq %rcx, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_ctpop_i64: -; SANDY: # %bb.0: -; SANDY-NEXT: popcntq (%rsi), %rcx # sched: [9:1.00] -; SANDY-NEXT: popcntq %rdi, %rax # sched: [3:1.00] -; SANDY-NEXT: orq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_ctpop_i64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: popcntq (%rsi), %rcx # sched: [8:1.00] -; HASWELL-NEXT: popcntq %rdi, %rax # sched: [3:1.00] -; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ctpop_i64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: popcntq (%rsi), %rcx # sched: [8:1.00] -; BROADWELL-NEXT: popcntq %rdi, %rax # sched: [3:1.00] -; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ctpop_i64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: popcntq (%rsi), %rcx # sched: [8:1.00] -; SKYLAKE-NEXT: popcntq %rdi, %rax # sched: [3:1.00] -; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_ctpop_i64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: popcntq (%rsi), %rcx # sched: [8:0.50] -; BDVER2-NEXT: popcntq %rdi, %rax # sched: [4:0.50] -; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_ctpop_i64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: popcntq (%rsi), %rcx # sched: [4:1.00] -; BTVER2-NEXT: popcntq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ctpop_i64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: popcntq (%rsi), %rcx # sched: [5:0.50] -; ZNVER1-NEXT: popcntq %rdi, %rax # sched: [1:0.25] -; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64 *%a1 - %2 = tail call i64 @llvm.ctpop.i64( i64 %1 ) - %3 = tail call i64 @llvm.ctpop.i64( i64 %a0 ) - %4 = or i64 %2, %3 - ret i64 %4 -} -declare i64 @llvm.ctpop.i64(i64) Index: test/CodeGen/X86/rdpid-schedule.ll =================================================================== --- test/CodeGen/X86/rdpid-schedule.ll +++ test/CodeGen/X86/rdpid-schedule.ll @@ -1,21 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+rdpid | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=icelake-client | FileCheck %s --check-prefix=CHECK --check-prefix=ICELAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=icelake-server | FileCheck %s --check-prefix=CHECK --check-prefix=ICELAKE - -define i32 @test_rdpid() { -; GENERIC-LABEL: test_rdpid: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rdpid %rax # sched: [100:0.33] -; GENERIC-NEXT: # kill: def $eax killed $eax killed $rax -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ICELAKE-LABEL: test_rdpid: -; ICELAKE: # %bb.0: -; ICELAKE-NEXT: rdpid %rax # sched: [100:0.25] -; ICELAKE-NEXT: # kill: def $eax killed $eax killed $rax -; ICELAKE-NEXT: retq # sched: [7:1.00] - %1 = tail call i32 @llvm.x86.rdpid() - ret i32 %1 -} -declare i32 @llvm.x86.rdpid() Index: test/CodeGen/X86/rdrand-schedule.ll =================================================================== --- test/CodeGen/X86/rdrand-schedule.ll +++ test/CodeGen/X86/rdrand-schedule.ll @@ -1,148 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+rdrnd | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GOLDMONT -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=IVY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -declare {i16, i32} @llvm.x86.rdrand.16() -declare {i32, i32} @llvm.x86.rdrand.32() -declare {i64, i32} @llvm.x86.rdrand.64() - -define i16 @test_rdrand_16(i16* %random_val) { -; GENERIC-LABEL: test_rdrand_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rdrandw %ax # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GOLDMONT-LABEL: test_rdrand_16: -; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: rdrandw %ax # sched: [100:1.00] -; GOLDMONT-NEXT: retq # sched: [4:1.00] -; -; IVY-LABEL: test_rdrand_16: -; IVY: # %bb.0: -; IVY-NEXT: rdrandw %ax # sched: [100:0.33] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rdrand_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: rdrandw %ax # sched: [1:5.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rdrand_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: rdrandw %ax # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rdrand_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: rdrandw %ax # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rdrand_16: -; SKX: # %bb.0: -; SKX-NEXT: rdrandw %ax # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_rdrand_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: rdrandw %ax # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %call = call {i16, i32} @llvm.x86.rdrand.16() - %randval = extractvalue {i16, i32} %call, 0 - ret i16 %randval -} - -define i32 @test_rdrand_32(i32* %random_val) { -; GENERIC-LABEL: test_rdrand_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rdrandl %eax # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GOLDMONT-LABEL: test_rdrand_32: -; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: rdrandl %eax # sched: [100:1.00] -; GOLDMONT-NEXT: retq # sched: [4:1.00] -; -; IVY-LABEL: test_rdrand_32: -; IVY: # %bb.0: -; IVY-NEXT: rdrandl %eax # sched: [100:0.33] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rdrand_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: rdrandl %eax # sched: [1:5.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rdrand_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: rdrandl %eax # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rdrand_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: rdrandl %eax # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rdrand_32: -; SKX: # %bb.0: -; SKX-NEXT: rdrandl %eax # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_rdrand_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: rdrandl %eax # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %call = call {i32, i32} @llvm.x86.rdrand.32() - %randval = extractvalue {i32, i32} %call, 0 - ret i32 %randval -} - -define i64 @test_rdrand_64(i64* %random_val) { -; GENERIC-LABEL: test_rdrand_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rdrandq %rax # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GOLDMONT-LABEL: test_rdrand_64: -; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: rdrandq %rax # sched: [100:1.00] -; GOLDMONT-NEXT: retq # sched: [4:1.00] -; -; IVY-LABEL: test_rdrand_64: -; IVY: # %bb.0: -; IVY-NEXT: rdrandq %rax # sched: [100:0.33] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rdrand_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: rdrandq %rax # sched: [1:5.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rdrand_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: rdrandq %rax # sched: [9:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rdrand_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: rdrandq %rax # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rdrand_64: -; SKX: # %bb.0: -; SKX-NEXT: rdrandq %rax # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_rdrand_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: rdrandq %rax # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %call = call {i64, i32} @llvm.x86.rdrand.64() - %randval = extractvalue {i64, i32} %call, 0 - ret i64 %randval -} Index: test/CodeGen/X86/rdseed-schedule.ll =================================================================== --- test/CodeGen/X86/rdseed-schedule.ll +++ test/CodeGen/X86/rdseed-schedule.ll @@ -1,116 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+rdseed | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GOLDMONT -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -declare {i16, i32} @llvm.x86.rdseed.16() -declare {i32, i32} @llvm.x86.rdseed.32() -declare {i64, i32} @llvm.x86.rdseed.64() - -define i16 @test_rdseed_16(i16* %random_val) { -; GENERIC-LABEL: test_rdseed_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rdseedw %ax # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GOLDMONT-LABEL: test_rdseed_16: -; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: rdseedw %ax # sched: [100:1.00] -; GOLDMONT-NEXT: retq # sched: [4:1.00] -; -; BROADWELL-LABEL: test_rdseed_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: rdseedw %ax # sched: [100:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rdseed_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: rdseedw %ax # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rdseed_16: -; SKX: # %bb.0: -; SKX-NEXT: rdseedw %ax # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_rdseed_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: rdseedw %ax # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %call = call {i16, i32} @llvm.x86.rdseed.16() - %randval = extractvalue {i16, i32} %call, 0 - ret i16 %randval -} - -define i32 @test_rdseed_32(i16* %random_val) { -; GENERIC-LABEL: test_rdseed_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rdseedl %eax # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GOLDMONT-LABEL: test_rdseed_32: -; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: rdseedl %eax # sched: [100:1.00] -; GOLDMONT-NEXT: retq # sched: [4:1.00] -; -; BROADWELL-LABEL: test_rdseed_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: rdseedl %eax # sched: [100:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rdseed_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: rdseedl %eax # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rdseed_32: -; SKX: # %bb.0: -; SKX-NEXT: rdseedl %eax # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_rdseed_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: rdseedl %eax # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %call = call {i32, i32} @llvm.x86.rdseed.32() - %randval = extractvalue {i32, i32} %call, 0 - ret i32 %randval -} - -define i64 @test_rdseed_64(i64* %random_val) { -; GENERIC-LABEL: test_rdseed_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rdseedq %rax # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GOLDMONT-LABEL: test_rdseed_64: -; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: rdseedq %rax # sched: [100:1.00] -; GOLDMONT-NEXT: retq # sched: [4:1.00] -; -; BROADWELL-LABEL: test_rdseed_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: rdseedq %rax # sched: [100:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rdseed_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: rdseedq %rax # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rdseed_64: -; SKX: # %bb.0: -; SKX-NEXT: rdseedq %rax # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_rdseed_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: rdseedq %rax # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %call = call {i64, i32} @llvm.x86.rdseed.64() - %randval = extractvalue {i64, i32} %call, 0 - ret i64 %randval -} Index: test/CodeGen/X86/recip-fastmath.ll =================================================================== --- test/CodeGen/X86/recip-fastmath.ll +++ test/CodeGen/X86/recip-fastmath.ll @@ -2,13 +2,13 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX ; If the target's divss/divps instructions are substantially ; slower than rcpss/rcpps with a Newton-Raphson refinement, @@ -26,59 +26,11 @@ ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-RECIP-LABEL: f32_no_estimate: -; AVX-RECIP: # %bb.0: -; AVX-RECIP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-RECIP-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; AVX-RECIP-NEXT: retq -; -; FMA-RECIP-LABEL: f32_no_estimate: -; FMA-RECIP: # %bb.0: -; FMA-RECIP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; FMA-RECIP-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; FMA-RECIP-NEXT: retq -; -; BDVER2-LABEL: f32_no_estimate: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [9:9.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: f32_no_estimate: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [19:19.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: f32_no_estimate: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:14.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: f32_no_estimate: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:7.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; HASWELL-NO-FMA-LABEL: f32_no_estimate: -; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; HASWELL-NO-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; HASWELL-NO-FMA-NEXT: retq -; -; KNL-LABEL: f32_no_estimate: -; KNL: # %bb.0: -; KNL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; KNL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:7.00] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: f32_no_estimate: -; SKX: # %bb.0: -; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [11:3.00] -; SKX-NEXT: retq # sched: [7:1.00] +; AVX-LABEL: f32_no_estimate: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %div = fdiv fast float 1.0, %x ret float %div } @@ -114,37 +66,37 @@ ; ; BDVER2-LABEL: f32_one_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 +; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: f32_one_step: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: f32_one_step: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: f32_one_step: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem +; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: f32_one_step: ; HASWELL-NO-FMA: # %bb.0: @@ -156,19 +108,12 @@ ; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; HASWELL-NO-FMA-NEXT: retq ; -; KNL-LABEL: f32_one_step: -; KNL: # %bb.0: -; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: f32_one_step: -; SKX: # %bb.0: -; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; AVX512-LABEL: f32_one_step: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem +; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; AVX512-NEXT: retq %div = fdiv fast float 1.0, %x ret float %div } @@ -218,52 +163,52 @@ ; ; BDVER2-LABEL: f32_two_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50] -; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 +; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 +; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 +; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: f32_two_step: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [2:1.00] -; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] -; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [2:1.00] -; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] -; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2 +; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2 +; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2 +; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0 +; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: f32_two_step: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] -; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 +; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 +; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 +; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 +; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: f32_two_step: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; HASWELL-NEXT: vmovaps %xmm1, %xmm3 +; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 +; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 +; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 +; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: f32_two_step: ; HASWELL-NO-FMA: # %bb.0: @@ -279,27 +224,16 @@ ; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; HASWELL-NO-FMA-NEXT: retq ; -; KNL-LABEL: f32_two_step: -; KNL: # %bb.0: -; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: f32_two_step: -; SKX: # %bb.0: -; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; AVX512-LABEL: f32_two_step: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX512-NEXT: vmovaps %xmm1, %xmm3 +; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 +; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 +; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 +; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 +; AVX512-NEXT: retq %div = fdiv fast float 1.0, %x ret float %div } @@ -326,27 +260,27 @@ ; ; BDVER2-LABEL: v4f32_no_estimate: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50] -; BDVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [9:9.50] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BDVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v4f32_no_estimate: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [19:19.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v4f32_no_estimate: ; SANDY: # %bb.0: -; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:14.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v4f32_no_estimate: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:7.00] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v4f32_no_estimate: ; HASWELL-NO-FMA: # %bb.0: @@ -354,17 +288,11 @@ ; HASWELL-NO-FMA-NEXT: vdivps %xmm0, %xmm1, %xmm0 ; HASWELL-NO-FMA-NEXT: retq ; -; KNL-LABEL: v4f32_no_estimate: -; KNL: # %bb.0: -; KNL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; KNL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:7.00] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v4f32_no_estimate: -; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; SKX-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [11:3.00] -; SKX-NEXT: retq # sched: [7:1.00] +; AVX512-LABEL: v4f32_no_estimate: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -400,38 +328,38 @@ ; ; BDVER2-LABEL: v4f32_one_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %xmm0, %xmm1 +; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 +; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v4f32_one_step: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %xmm0, %xmm1 +; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v4f32_one_step: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 +; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v4f32_one_step: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %xmm0, %xmm1 +; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v4f32_one_step: ; HASWELL-NO-FMA: # %bb.0: @@ -445,18 +373,18 @@ ; ; KNL-LABEL: v4f32_one_step: ; KNL: # %bb.0: -; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50] -; KNL-NEXT: retq # sched: [7:1.00] +; KNL-NEXT: vrcpps %xmm0, %xmm1 +; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 +; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; KNL-NEXT: retq ; ; SKX-LABEL: v4f32_one_step: ; SKX: # %bb.0: -; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vrcpps %xmm0, %xmm1 +; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; SKX-NEXT: retq %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -506,52 +434,52 @@ ; ; BDVER2-LABEL: v4f32_two_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %xmm0, %xmm1 +; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 +; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 +; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 +; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v4f32_two_step: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00] -; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] -; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] -; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %xmm0, %xmm1 +; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 +; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 +; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2 +; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1 +; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0 +; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v4f32_two_step: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] -; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 +; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 +; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 +; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 +; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 +; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 +; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v4f32_two_step: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %xmm0, %xmm1 +; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vmovaps %xmm1, %xmm3 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v4f32_two_step: ; HASWELL-NO-FMA: # %bb.0: @@ -567,27 +495,16 @@ ; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; HASWELL-NO-FMA-NEXT: retq ; -; KNL-LABEL: v4f32_two_step: -; KNL: # %bb.0: -; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v4f32_two_step: -; SKX: # %bb.0: -; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; AVX512-LABEL: v4f32_two_step: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcpps %xmm0, %xmm1 +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vmovaps %xmm1, %xmm3 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 +; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 +; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 +; AVX512-NEXT: retq %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -617,27 +534,27 @@ ; ; BDVER2-LABEL: v8f32_no_estimate: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50] -; BDVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [9:19.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BDVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v8f32_no_estimate: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [38:38.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v8f32_no_estimate: ; SANDY: # %bb.0: -; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:28.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v8f32_no_estimate: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:14.00] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v8f32_no_estimate: ; HASWELL-NO-FMA: # %bb.0: @@ -645,17 +562,11 @@ ; HASWELL-NO-FMA-NEXT: vdivps %ymm0, %ymm1, %ymm0 ; HASWELL-NO-FMA-NEXT: retq ; -; KNL-LABEL: v8f32_no_estimate: -; KNL: # %bb.0: -; KNL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; KNL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:14.00] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v8f32_no_estimate: -; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SKX-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [11:5.00] -; SKX-NEXT: retq # sched: [7:1.00] +; AVX512-LABEL: v8f32_no_estimate: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: retq %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -698,38 +609,38 @@ ; ; BDVER2-LABEL: v8f32_one_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00] -; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm1 +; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 +; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v8f32_one_step: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %ymm0, %ymm1 +; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 +; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v8f32_one_step: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %ymm0, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v8f32_one_step: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v8f32_one_step: ; HASWELL-NO-FMA: # %bb.0: @@ -743,18 +654,18 @@ ; ; KNL-LABEL: v8f32_one_step: ; KNL: # %bb.0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50] -; KNL-NEXT: retq # sched: [7:1.00] +; KNL-NEXT: vrcpps %ymm0, %ymm1 +; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 +; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 +; KNL-NEXT: retq ; ; SKX-LABEL: v8f32_one_step: ; SKX: # %bb.0: -; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vrcpps %ymm0, %ymm1 +; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 +; SKX-NEXT: retq %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -817,52 +728,52 @@ ; ; BDVER2-LABEL: v8f32_two_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm1 +; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 +; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 +; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 +; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v8f32_two_step: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %ymm0, %ymm1 +; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 +; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 +; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 +; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 +; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v8f32_two_step: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %ymm0, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 +; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v8f32_two_step: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vmovaps %ymm1, %ymm3 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v8f32_two_step: ; HASWELL-NO-FMA: # %bb.0: @@ -878,27 +789,16 @@ ; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 ; HASWELL-NO-FMA-NEXT: retq ; -; KNL-LABEL: v8f32_two_step: -; KNL: # %bb.0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [5:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v8f32_two_step: -; SKX: # %bb.0: -; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] -; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; AVX512-LABEL: v8f32_two_step: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcpps %ymm0, %ymm1 +; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vmovaps %ymm1, %ymm3 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 +; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 +; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 +; AVX512-NEXT: retq %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -936,31 +836,31 @@ ; ; BDVER2-LABEL: v16f32_no_estimate: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50] -; BDVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [9:19.00] -; BDVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [9:19.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BDVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 +; BDVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v16f32_no_estimate: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [38:38.00] -; BTVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [38:38.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 +; BTVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v16f32_no_estimate: ; SANDY: # %bb.0: -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SANDY-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [29:28.00] -; SANDY-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [29:28.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vdivps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vdivps %ymm1, %ymm2, %ymm1 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v16f32_no_estimate: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [21:14.00] -; HASWELL-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [21:14.00] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vdivps %ymm0, %ymm2, %ymm0 +; HASWELL-NEXT: vdivps %ymm1, %ymm2, %ymm1 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v16f32_no_estimate: ; HASWELL-NO-FMA: # %bb.0: @@ -969,17 +869,11 @@ ; HASWELL-NO-FMA-NEXT: vdivps %ymm1, %ymm2, %ymm1 ; HASWELL-NO-FMA-NEXT: retq ; -; KNL-LABEL: v16f32_no_estimate: -; KNL: # %bb.0: -; KNL-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [10:1.00] -; KNL-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [21:14.00] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v16f32_no_estimate: -; SKX: # %bb.0: -; SKX-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [8:0.50] -; SKX-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [18:10.00] -; SKX-NEXT: retq # sched: [7:1.00] +; AVX512-LABEL: v16f32_no_estimate: +; AVX512: # %bb.0: +; AVX512-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0 +; AVX512-NEXT: retq %div = fdiv fast <16 x float> , %x ret <16 x float> %div } @@ -1045,55 +939,55 @@ ; ; BDVER2-LABEL: v16f32_one_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50] -; BDVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [5:2.00] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm1, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm4, %ymm1, %ymm4, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm2 +; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BDVER2-NEXT: vrcpps %ymm1, %ymm4 +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm1, %ymm1 +; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 +; BDVER2-NEXT: vfmaddps %ymm4, %ymm1, %ymm4, %ymm1 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v16f32_one_step: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00] -; BTVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm4, %ymm1, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm4, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm4, %ymm1 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %ymm0, %ymm2 +; BTVER2-NEXT: vrcpps %ymm1, %ymm4 +; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; BTVER2-NEXT: vmulps %ymm4, %ymm1, %ymm1 +; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1 +; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 +; BTVER2-NEXT: vmulps %ymm1, %ymm4, %ymm1 +; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; BTVER2-NEXT: vaddps %ymm1, %ymm4, %ymm1 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v16f32_one_step: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00] -; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %ymm0, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vrcpps %ymm1, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 +; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v16f32_one_step: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NEXT: vrcpps %ymm1, %ymm4 # sched: [11:2.00] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4 sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm2 +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vrcpps %ymm1, %ymm4 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v16f32_one_step: ; HASWELL-NO-FMA: # %bb.0: @@ -1110,19 +1004,12 @@ ; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1 ; HASWELL-NO-FMA-NEXT: retq ; -; KNL-LABEL: v16f32_one_step: -; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v16f32_one_step: -; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; AVX512-LABEL: v16f32_one_step: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcp14ps %zmm0, %zmm1 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem +; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 +; AVX512-NEXT: retq %div = fdiv fast <16 x float> , %x ret <16 x float> %div } @@ -1226,81 +1113,81 @@ ; ; BDVER2-LABEL: v16f32_two_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm2 +; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 +; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 +; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 +; BDVER2-NEXT: vrcpps %ymm1, %ymm2 +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 +; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 +; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v16f32_two_step: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %ymm0, %ymm2 +; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3 +; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 +; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 +; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 +; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; BTVER2-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 +; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; BTVER2-NEXT: vrcpps %ymm1, %ymm2 +; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3 +; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 +; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 +; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 +; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 +; BTVER2-NEXT: vsubps %ymm1, %ymm4, %ymm1 +; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v16f32_two_step: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [5:1.00] -; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00] -; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %ymm0, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3 +; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 +; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 +; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vrcpps %ymm1, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3 +; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 +; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 +; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 +; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v16f32_two_step: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4 sched: [5:0.50] -; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00] -; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4 sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm2 +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vmovaps %ymm2, %ymm4 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4 +; HASWELL-NEXT: vrcpps %ymm1, %ymm2 +; HASWELL-NEXT: vmovaps %ymm2, %ymm4 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v16f32_two_step: ; HASWELL-NO-FMA: # %bb.0: @@ -1325,27 +1212,16 @@ ; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1 ; HASWELL-NO-FMA-NEXT: retq ; -; KNL-LABEL: v16f32_two_step: -; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] -; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [10:1.00] -; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [5:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v16f32_two_step: -; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] -; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [8:0.50] -; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; AVX512-LABEL: v16f32_two_step: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcp14ps %zmm0, %zmm1 +; AVX512-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vmovaps %zmm1, %zmm3 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 +; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 +; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 +; AVX512-NEXT: retq %div = fdiv fast <16 x float> , %x ret <16 x float> %div } Index: test/CodeGen/X86/recip-fastmath2.ll =================================================================== --- test/CodeGen/X86/recip-fastmath2.ll +++ test/CodeGen/X86/recip-fastmath2.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX ; It's the extra tests coverage for recip as discussed on D26855. @@ -19,59 +19,11 @@ ; SSE-NEXT: mulss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; -; AVX-RECIP-LABEL: f32_no_step_2: -; AVX-RECIP: # %bb.0: -; AVX-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm0 -; AVX-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 -; AVX-RECIP-NEXT: retq -; -; FMA-RECIP-LABEL: f32_no_step_2: -; FMA-RECIP: # %bb.0: -; FMA-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm0 -; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 -; FMA-RECIP-NEXT: retq -; -; BDVER2-LABEL: f32_no_step_2: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: f32_no_step_2: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: f32_no_step_2: -; SANDY: # %bb.0: -; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: f32_no_step_2: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; HASWELL-NO-FMA-LABEL: f32_no_step_2: -; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: f32_no_step_2: -; KNL: # %bb.0: -; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: f32_no_step_2: -; SKX: # %bb.0: -; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; AVX-LABEL: f32_no_step_2: +; AVX: # %bb.0: +; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq %div = fdiv fast float 1234.0, %x ret float %div } @@ -110,68 +62,60 @@ ; ; BDVER2-LABEL: f32_one_step_2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 +; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: f32_one_step_2: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: f32_one_step_2: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: f32_one_step_2: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem +; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: f32_one_step_2: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: f32_one_step_2: -; KNL: # %bb.0: -; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50] -; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: f32_one_step_2: -; SKX: # %bb.0: -; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] -; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; HASWELL-NO-FMA-NEXT: retq +; +; AVX512-LABEL: f32_one_step_2: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem +; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: retq %div = fdiv fast float 3456.0, %x ret float %div } @@ -213,75 +157,66 @@ ; ; BDVER2-LABEL: f32_one_step_2_divs: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:1.00] -; BDVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 +; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 +; BDVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: f32_one_step_2_divs: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [7:1.00] -; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 +; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: f32_one_step_2_divs: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00] -; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 +; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: f32_one_step_2_divs: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50] -; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem +; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 +; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: f32_one_step_2_divs: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50] -; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: f32_one_step_2_divs: -; KNL: # %bb.0: -; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50] -; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50] -; KNL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: f32_one_step_2_divs: -; SKX: # %bb.0: -; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] -; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] -; SKX-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 +; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: retq +; +; AVX512-LABEL: f32_one_step_2_divs: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem +; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 +; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq %div = fdiv fast float 3456.0, %x %div2 = fdiv fast float %div, %x ret float %div2 @@ -335,95 +270,83 @@ ; ; BDVER2-LABEL: f32_two_step_2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50] -; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 +; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 +; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 +; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: f32_two_step_2: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [2:1.00] -; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] -; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [2:1.00] -; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] -; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2 +; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2 +; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2 +; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0 +; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: f32_two_step_2: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] -; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 +; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 +; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 +; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 +; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: f32_two_step_2: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50] -; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; HASWELL-NEXT: vmovaps %xmm1, %xmm3 +; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 +; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 +; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 +; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 +; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: f32_two_step_2: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: f32_two_step_2: -; KNL: # %bb.0: -; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50] -; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: f32_two_step_2: -; SKX: # %bb.0: -; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50] -; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2 +; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2 +; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2 +; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0 +; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; HASWELL-NO-FMA-NEXT: retq +; +; AVX512-LABEL: f32_two_step_2: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1 +; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; AVX512-NEXT: vmovaps %xmm1, %xmm3 +; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 +; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 +; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 +; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: retq %div = fdiv fast float 6789.0, %x ret float %div } @@ -462,70 +385,70 @@ ; ; BDVER2-LABEL: v4f32_one_step2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %xmm0, %xmm1 +; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 +; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v4f32_one_step2: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %xmm0, %xmm1 +; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v4f32_one_step2: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 +; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v4f32_one_step2: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %xmm0, %xmm1 +; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v4f32_one_step2: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 +; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; HASWELL-NO-FMA-NEXT: retq ; ; KNL-LABEL: v4f32_one_step2: ; KNL: # %bb.0: -; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50] -; KNL-NEXT: retq # sched: [7:1.00] +; KNL-NEXT: vrcpps %xmm0, %xmm1 +; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 +; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; KNL-NEXT: retq ; ; SKX-LABEL: v4f32_one_step2: ; SKX: # %bb.0: -; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vrcpps %xmm0, %xmm1 +; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; SKX-NEXT: retq %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -567,77 +490,77 @@ ; ; BDVER2-LABEL: v4f32_one_step_2_divs: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] -; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:1.00] -; BDVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %xmm0, %xmm1 +; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 +; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 +; BDVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v4f32_one_step_2_divs: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [7:1.00] -; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %xmm0, %xmm1 +; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 +; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v4f32_one_step_2_divs: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00] -; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 +; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 +; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v4f32_one_step_2_divs: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:0.50] -; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %xmm0, %xmm1 +; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 +; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v4f32_one_step_2_divs: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:0.50] -; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 +; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 +; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: retq ; ; KNL-LABEL: v4f32_one_step_2_divs: ; KNL: # %bb.0: -; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:0.50] -; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [7:1.00] +; KNL-NEXT: vrcpps %xmm0, %xmm1 +; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 +; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 +; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; KNL-NEXT: retq ; ; SKX-LABEL: v4f32_one_step_2_divs: ; SKX: # %bb.0: -; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50] -; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vrcpps %xmm0, %xmm1 +; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 +; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 +; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; SKX-NEXT: retq %div = fdiv fast <4 x float> , %x %div2 = fdiv fast <4 x float> %div, %x ret <4 x float> %div2 @@ -691,95 +614,83 @@ ; ; BDVER2-LABEL: v4f32_two_step2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %xmm0, %xmm1 +; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 +; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 +; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 +; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v4f32_two_step2: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00] -; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] -; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] -; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %xmm0, %xmm1 +; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 +; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 +; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2 +; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1 +; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0 +; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v4f32_two_step2: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] -; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 +; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 +; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 +; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 +; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 +; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 +; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v4f32_two_step2: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %xmm0, %xmm1 +; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vmovaps %xmm1, %xmm3 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v4f32_two_step2: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: v4f32_two_step2: -; KNL: # %bb.0: -; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v4f32_two_step2: -; SKX: # %bb.0: -; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50] -; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 +; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 +; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 +; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 +; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0 +; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; HASWELL-NO-FMA-NEXT: retq +; +; AVX512-LABEL: v4f32_two_step2: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcpps %xmm0, %xmm1 +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vmovaps %xmm1, %xmm3 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 +; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 +; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 +; AVX512-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: retq %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -826,70 +737,70 @@ ; ; BDVER2-LABEL: v8f32_one_step2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00] -; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm1 +; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 +; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v8f32_one_step2: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %ymm0, %ymm1 +; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 +; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v8f32_one_step2: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %ymm0, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v8f32_one_step2: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v8f32_one_step2: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; HASWELL-NO-FMA-NEXT: retq ; ; KNL-LABEL: v8f32_one_step2: ; KNL: # %bb.0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; KNL-NEXT: retq # sched: [7:1.00] +; KNL-NEXT: vrcpps %ymm0, %ymm1 +; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 +; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 +; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; KNL-NEXT: retq ; ; SKX-LABEL: v8f32_one_step2: ; SKX: # %bb.0: -; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vrcpps %ymm0, %ymm1 +; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; SKX-NEXT: retq %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -940,77 +851,77 @@ ; ; BDVER2-LABEL: v8f32_one_step_2_divs: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00] -; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [10:2.00] -; BDVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm1 +; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 +; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 +; BDVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v8f32_one_step_2_divs: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [7:2.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %ymm0, %ymm1 +; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 +; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 +; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v8f32_one_step_2_divs: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:1.00] -; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %ymm0, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 +; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v8f32_one_step_2_divs: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:0.50] -; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 +; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v8f32_one_step_2_divs: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:0.50] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; HASWELL-NO-FMA-NEXT: retq ; ; KNL-LABEL: v8f32_one_step_2_divs: ; KNL: # %bb.0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:0.50] -; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [7:1.00] +; KNL-NEXT: vrcpps %ymm0, %ymm1 +; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 +; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 +; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 +; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; KNL-NEXT: retq ; ; SKX-LABEL: v8f32_one_step_2_divs: ; SKX: # %bb.0: -; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [11:0.50] -; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; SKX-NEXT: vrcpps %ymm0, %ymm1 +; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 +; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; SKX-NEXT: retq %div = fdiv fast <8 x float> , %x %div2 = fdiv fast <8 x float> %div, %x ret <8 x float> %div2 @@ -1078,95 +989,83 @@ ; ; BDVER2-LABEL: v8f32_two_step2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm1 +; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 +; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 +; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 +; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v8f32_two_step2: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %ymm0, %ymm1 +; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 +; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 +; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 +; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 +; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v8f32_two_step2: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %ymm0, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 +; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v8f32_two_step2: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vmovaps %ymm1, %ymm3 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v8f32_two_step2: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: v8f32_two_step2: -; KNL: # %bb.0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v8f32_two_step2: -; SKX: # %bb.0: -; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] -; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 +; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 +; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 +; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; HASWELL-NO-FMA-NEXT: retq +; +; AVX512-LABEL: v8f32_two_step2: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcpps %ymm0, %ymm1 +; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vmovaps %ymm1, %ymm3 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 +; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 +; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 +; AVX512-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: retq %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -1178,50 +1077,10 @@ ; SSE-NEXT: rcpps %xmm1, %xmm1 ; SSE-NEXT: retq ; -; AVX-RECIP-LABEL: v8f32_no_step: -; AVX-RECIP: # %bb.0: -; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm0 -; AVX-RECIP-NEXT: retq -; -; FMA-RECIP-LABEL: v8f32_no_step: -; FMA-RECIP: # %bb.0: -; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm0 -; FMA-RECIP-NEXT: retq -; -; BDVER2-LABEL: v8f32_no_step: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: v8f32_no_step: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: v8f32_no_step: -; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: v8f32_no_step: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; HASWELL-NO-FMA-LABEL: v8f32_no_step: -; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: v8f32_no_step: -; KNL: # %bb.0: -; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v8f32_no_step: -; SKX: # %bb.0: -; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: retq # sched: [7:1.00] +; AVX-LABEL: v8f32_no_step: +; AVX: # %bb.0: +; AVX-NEXT: vrcpps %ymm0, %ymm0 +; AVX-NEXT: retq %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -1235,59 +1094,11 @@ ; SSE-NEXT: mulps {{.*}}(%rip), %xmm1 ; SSE-NEXT: retq ; -; AVX-RECIP-LABEL: v8f32_no_step2: -; AVX-RECIP: # %bb.0: -; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm0 -; AVX-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 -; AVX-RECIP-NEXT: retq -; -; FMA-RECIP-LABEL: v8f32_no_step2: -; FMA-RECIP: # %bb.0: -; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm0 -; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 -; FMA-RECIP-NEXT: retq -; -; BDVER2-LABEL: v8f32_no_step2: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: v8f32_no_step2: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: v8f32_no_step2: -; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: v8f32_no_step2: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; HASWELL-NO-FMA-LABEL: v8f32_no_step2: -; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: v8f32_no_step2: -; KNL: # %bb.0: -; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v8f32_no_step2: -; SKX: # %bb.0: -; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; AVX-LABEL: v8f32_no_step2: +; AVX: # %bb.0: +; AVX-NEXT: vrcpps %ymm0, %ymm0 +; AVX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: retq %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -1361,96 +1172,88 @@ ; ; BDVER2-LABEL: v16f32_one_step2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50] -; BDVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [5:2.00] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm4, %ymm0, %ymm4, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %ymm1, %ymm2 +; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BDVER2-NEXT: vrcpps %ymm0, %ymm4 +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm0, %ymm0 +; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 +; BDVER2-NEXT: vfmaddps %ymm4, %ymm0, %ymm4, %ymm0 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v16f32_one_step2: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm4, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:2.00] -; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm4, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm4, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %ymm1, %ymm2 +; BTVER2-NEXT: vrcpps %ymm0, %ymm4 +; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 +; BTVER2-NEXT: vmulps %ymm4, %ymm0, %ymm0 +; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1 +; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; BTVER2-NEXT: vmulps %ymm0, %ymm4, %ymm0 +; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; BTVER2-NEXT: vaddps %ymm0, %ymm4, %ymm0 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v16f32_one_step2: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %ymm1, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; SANDY-NEXT: vrcpps %ymm0, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v16f32_one_step2: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NEXT: vrcpps %ymm0, %ymm4 # sched: [11:2.00] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4 sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %ymm1, %ymm2 +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vrcpps %ymm0, %ymm4 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v16f32_one_step2: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: v16f32_one_step2: -; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v16f32_one_step2: -; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2 +; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1 +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2 +; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0 +; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; HASWELL-NO-FMA-NEXT: retq +; +; AVX512-LABEL: v16f32_one_step2: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcp14ps %zmm0, %zmm1 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem +; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 +; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: retq %div = fdiv fast <16 x float> , %x ret <16 x float> %div } @@ -1532,108 +1335,99 @@ ; ; BDVER2-LABEL: v16f32_one_step_2_divs: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [10:2.00] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [10:2.00] -; BDVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm2 +; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 +; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 +; BDVER2-NEXT: vrcpps %ymm1, %ymm2 +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 +; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 +; BDVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0 +; BDVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v16f32_one_step_2_divs: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:2.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [7:2.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:2.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [7:2.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %ymm0, %ymm2 +; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 +; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; BTVER2-NEXT: vrcpps %ymm1, %ymm2 +; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 +; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 +; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 +; BTVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0 +; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v16f32_one_step_2_divs: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vrcpps %ymm1, %ymm4 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1 # sched: [5:1.00] -; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm1, %ymm4, %ymm1 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm1, %ymm4, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [12:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [12:1.00] -; SANDY-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %ymm0, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; SANDY-NEXT: vrcpps %ymm1, %ymm4 +; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1 +; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm4, %ymm1 +; SANDY-NEXT: vaddps %ymm1, %ymm4, %ymm1 +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 +; SANDY-NEXT: vmulps %ymm0, %ymm3, %ymm0 +; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v16f32_one_step_2_divs: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [12:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [12:0.50] -; HASWELL-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm2 +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 +; HASWELL-NEXT: vrcpps %ymm1, %ymm2 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 +; HASWELL-NEXT: vmulps %ymm0, %ymm3, %ymm0 +; HASWELL-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v16f32_one_step_2_divs: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm4 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm4, %ymm1, %ymm1 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm4, %ymm1 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm4, %ymm1 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [12:0.50] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [12:0.50] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: v16f32_one_step_2_divs: -; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [12:0.50] -; KNL-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v16f32_one_step_2_divs: -; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [11:0.50] -; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2 +; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm4 +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0 +; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps %ymm4, %ymm1, %ymm1 +; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1 +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm4, %ymm1 +; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm4, %ymm1 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm3, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; HASWELL-NO-FMA-NEXT: retq +; +; AVX512-LABEL: v16f32_one_step_2_divs: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcp14ps %zmm0, %zmm1 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem +; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 +; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 +; AVX512-NEXT: vmulps %zmm0, %zmm1, %zmm0 +; AVX512-NEXT: retq %div = fdiv fast <16 x float> , %x %div2 = fdiv fast <16 x float> %div, %x ret <16 x float> %div2 @@ -1745,138 +1539,126 @@ ; ; BDVER2-LABEL: v16f32_two_step2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [10:2.00] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50] -; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %ymm1, %ymm2 +; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 +; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 +; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 +; BDVER2-NEXT: vrcpps %ymm0, %ymm2 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 +; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 +; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 +; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v16f32_two_step2: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:2.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [7:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; BTVER2-NEXT: vrcpps %ymm1, %ymm2 +; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3 +; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 +; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 +; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 +; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 +; BTVER2-NEXT: vsubps %ymm1, %ymm4, %ymm1 +; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; BTVER2-NEXT: vrcpps %ymm0, %ymm2 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3 +; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 +; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 +; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 +; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; BTVER2-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 +; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v16f32_two_step2: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00] -; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00] -; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [5:1.00] -; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %ymm1, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3 +; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 +; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 +; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 +; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; SANDY-NEXT: vrcpps %ymm0, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3 +; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 +; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 +; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v16f32_two_step2: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4 sched: [5:0.50] -; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00] -; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3 sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4 sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %ymm1, %ymm2 +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NEXT: vmovaps %ymm2, %ymm4 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4 +; HASWELL-NEXT: vrcpps %ymm0, %ymm2 +; HASWELL-NEXT: vmovaps %ymm2, %ymm4 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 +; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3 +; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v16f32_two_step2: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: v16f32_two_step2: -; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00] -; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [10:1.00] -; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [5:0.50] -; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v16f32_two_step2: -; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] -; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [8:0.50] -; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2 +; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm3 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3 +; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3 +; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2 +; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1 +; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm4, %ymm1 +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2 +; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm3 +; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3 +; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3 +; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2 +; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0 +; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; HASWELL-NO-FMA-NEXT: retq +; +; AVX512-LABEL: v16f32_two_step2: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcp14ps %zmm0, %zmm1 +; AVX512-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512-NEXT: vmovaps %zmm1, %zmm3 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 +; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 +; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 +; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 +; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: retq %div = fdiv fast <16 x float> , %x ret <16 x float> %div } @@ -1904,43 +1686,38 @@ ; ; BDVER2-LABEL: v16f32_no_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [5:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm0 +; BDVER2-NEXT: vrcpps %ymm1, %ymm1 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v16f32_no_step: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vrcpps %ymm0, %ymm0 +; BTVER2-NEXT: vrcpps %ymm1, %ymm1 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v16f32_no_step: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; SANDY-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %ymm0, %ymm0 +; SANDY-NEXT: vrcpps %ymm1, %ymm1 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v16f32_no_step: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NEXT: vrcpps %ymm1, %ymm1 # sched: [11:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm0 +; HASWELL-NEXT: vrcpps %ymm1, %ymm1 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v16f32_no_step: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm1 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: v16f32_no_step: -; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [11:2.00] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v16f32_no_step: -; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00] -; SKX-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 +; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm1 +; HASWELL-NO-FMA-NEXT: retq +; +; AVX512-LABEL: v16f32_no_step: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcp14ps %zmm0, %zmm0 +; AVX512-NEXT: retq %div = fdiv fast <16 x float> , %x ret <16 x float> %div } @@ -1976,55 +1753,49 @@ ; ; BDVER2-LABEL: v16f32_no_step2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [5:2.00] -; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] +; BDVER2-NEXT: vrcpps %ymm1, %ymm1 +; BDVER2-NEXT: vrcpps %ymm0, %ymm0 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; BDVER2-NEXT: retq ; ; BTVER2-LABEL: v16f32_no_step2: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [2:2.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00] -; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] +; BTVER2-NEXT: vrcpps %ymm1, %ymm1 +; BTVER2-NEXT: vrcpps %ymm0, %ymm0 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; BTVER2-NEXT: retq ; ; SANDY-LABEL: v16f32_no_step2: ; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00] -; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vrcpps %ymm1, %ymm1 +; SANDY-NEXT: vrcpps %ymm0, %ymm0 +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; SANDY-NEXT: retq ; ; HASWELL-LABEL: v16f32_no_step2: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm1, %ymm1 # sched: [11:2.00] -; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] +; HASWELL-NEXT: vrcpps %ymm1, %ymm1 +; HASWELL-NEXT: vrcpps %ymm0, %ymm0 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; HASWELL-NEXT: retq ; ; HASWELL-NO-FMA-LABEL: v16f32_no_step2: ; HASWELL-NO-FMA: # %bb.0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm1 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00] -; -; KNL-LABEL: v16f32_no_step2: -; KNL: # %bb.0: -; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [11:2.00] -; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50] -; KNL-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: v16f32_no_step2: -; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00] -; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: retq # sched: [7:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm1 +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 +; HASWELL-NO-FMA-NEXT: retq +; +; AVX512-LABEL: v16f32_no_step2: +; AVX512: # %bb.0: +; AVX512-NEXT: vrcp14ps %zmm0, %zmm0 +; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: retq %div = fdiv fast <16 x float> , %x ret <16 x float> %div } Index: test/CodeGen/X86/rtm-schedule.ll =================================================================== --- test/CodeGen/X86/rtm-schedule.ll +++ test/CodeGen/X86/rtm-schedule.ll @@ -1,62 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=x86-64 -mattr=+rtm | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=SKL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=cannonlake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=CNL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=icelake-client | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=ICL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=icelake-server | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=ICL - -define i32 @test_xbegin() nounwind uwtable { -; GENERIC-LABEL: test_xbegin: -; GENERIC: # %bb.0: -; GENERIC-NEXT: xbegin .LBB0_2 # sched: [100:0.33] -; GENERIC-NEXT: # %bb.1: -; GENERIC-NEXT: movl $-1, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; GENERIC-NEXT: .LBB0_2: -; GENERIC-NEXT: # XABORT DEF # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SKYLAKE-LABEL: test_xbegin: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: xbegin .LBB0_2 # sched: [100:0.25] -; SKYLAKE-NEXT: # %bb.1: -; SKYLAKE-NEXT: movl $-1, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; SKYLAKE-NEXT: .LBB0_2: -; SKYLAKE-NEXT: # XABORT DEF # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] - %1 = tail call i32 @llvm.x86.xbegin() nounwind - ret i32 %1 -} -declare i32 @llvm.x86.xbegin() nounwind - -define void @test_xend() nounwind uwtable { -; GENERIC-LABEL: test_xend: -; GENERIC: # %bb.0: -; GENERIC-NEXT: xend # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SKYLAKE-LABEL: test_xend: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: xend # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] - tail call void @llvm.x86.xend() nounwind - ret void -} -declare void @llvm.x86.xend() nounwind - -define void @test_xabort() nounwind uwtable { -; GENERIC-LABEL: test_xabort: -; GENERIC: # %bb.0: -; GENERIC-NEXT: xabort $2 # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SKYLAKE-LABEL: test_xabort: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: xabort $2 # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] - tail call void @llvm.x86.xabort(i8 2) - ret void -} -declare void @llvm.x86.xabort(i8) nounwind Index: test/CodeGen/X86/schedule-x86-64-shld.ll =================================================================== --- test/CodeGen/X86/schedule-x86-64-shld.ll +++ test/CodeGen/X86/schedule-x86-64-shld.ll @@ -1,471 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 - - -; uint64_t lshift10(uint64_t a, uint64_t b) -; { -; return (a << 10) | (b >> 54); -; } - -define i64 @lshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize { -; GENERIC-LABEL: lshift10_optsize: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: lshift10_optsize: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BDVER12-NEXT: shldq $10, %rsi, %rax # sched: [4:3.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: lshift10_optsize: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: shldq $10, %rsi, %rax # sched: [3:3.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %shl = shl i64 %a, 10 - %shr = lshr i64 %b, 54 - %or = or i64 %shr, %shl - ret i64 %or -} - -define i64 @lshift10(i64 %a, i64 %b) nounwind readnone { -; GENERIC-LABEL: lshift10: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: lshift10: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50] -; BDVER12-NEXT: shrq $54, %rsi # sched: [1:0.50] -; BDVER12-NEXT: leaq (%rsi,%rdi), %rax # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: lshift10: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: shlq $10, %rdi # sched: [1:0.50] -; BTVER2-NEXT: shrq $54, %rsi # sched: [1:0.50] -; BTVER2-NEXT: leaq (%rsi,%rdi), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %shl = shl i64 %a, 10 - %shr = lshr i64 %b, 54 - %or = or i64 %shr, %shl - ret i64 %or -} - -; uint64_t rshift10(uint64_t a, uint64_t b) -; { -; return (a >> 62) | (b << 2); -; } - -; Should be done via shld -define i64 @rshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize { -; GENERIC-LABEL: rshift10_optsize: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: rshift10_optsize: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BDVER12-NEXT: shrdq $62, %rsi, %rax # sched: [4:3.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: rshift10_optsize: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: shrdq $62, %rsi, %rax # sched: [3:3.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %shl = lshr i64 %a, 62 - %shr = shl i64 %b, 2 - %or = or i64 %shr, %shl - ret i64 %or -} - -; Should be done via lea (x,y,4),z -define i64 @rshift10(i64 %a, i64 %b) nounwind readnone { -; GENERIC-LABEL: rshift10: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: rshift10: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: shrq $62, %rdi # sched: [1:0.50] -; BDVER12-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: rshift10: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: shrq $62, %rdi # sched: [1:0.50] -; BTVER2-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %shl = lshr i64 %a, 62 - %shr = shl i64 %b, 2 - %or = or i64 %shr, %shl - ret i64 %or -} - -;uint64_t lshift(uint64_t a, uint64_t b, uint64_t c) -;{ -; return (a << c) | (b >> (64-c)); -;} - -define i64 @lshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize { -; GENERIC-LABEL: lshift_cl_optsize: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33] -; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx -; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: lshift_cl_optsize: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50] -; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx -; BDVER12-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: lshift_cl_optsize: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx -; BTVER2-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %shl = shl i64 %a, %c - %sub = sub nsw i64 64, %c - %shr = lshr i64 %b, %sub - %or = or i64 %shr, %shl - ret i64 %or -} - -define i64 @lshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone { -; GENERIC-LABEL: lshift_cl: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33] -; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx -; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: lshift_cl: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50] -; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.50] -; BDVER12-NEXT: shlq %cl, %rdi # sched: [1:0.50] -; BDVER12-NEXT: negb %cl # sched: [1:0.50] -; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx -; BDVER12-NEXT: shrq %cl, %rax # sched: [1:0.50] -; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: lshift_cl: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] -; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50] -; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: negb %cl # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx -; BTVER2-NEXT: shrq %cl, %rax # sched: [1:0.50] -; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %shl = shl i64 %a, %c - %sub = sub nsw i64 64, %c - %shr = lshr i64 %b, %sub - %or = or i64 %shr, %shl - ret i64 %or -} - - -;uint64_t rshift(uint64_t a, uint64_t b, int c) -;{ -; return (a >> c) | (b << (64-c)); -;} - -define i64 @rshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize { -; GENERIC-LABEL: rshift_cl_optsize: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33] -; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx -; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: rshift_cl_optsize: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50] -; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx -; BDVER12-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: rshift_cl_optsize: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx -; BTVER2-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %shr = lshr i64 %a, %c - %sub = sub nsw i64 64, %c - %shl = shl i64 %b, %sub - %or = or i64 %shr, %shl - ret i64 %or -} - -define i64 @rshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone { -; GENERIC-LABEL: rshift_cl: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33] -; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx -; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: rshift_cl: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50] -; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.50] -; BDVER12-NEXT: shrq %cl, %rdi # sched: [1:0.50] -; BDVER12-NEXT: negb %cl # sched: [1:0.50] -; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx -; BDVER12-NEXT: shlq %cl, %rax # sched: [1:0.50] -; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: rshift_cl: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] -; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50] -; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: negb %cl # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx -; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50] -; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %shr = lshr i64 %a, %c - %sub = sub nsw i64 64, %c - %shl = shl i64 %b, %sub - %or = or i64 %shr, %shl - ret i64 %or -} - -; extern uint64_t x; -;void lshift(uint64_t a, uint64_t b, uint_64_t c) -;{ -; x = (x << c) | (a >> (64-c)); -;} -@x = global i64 0, align 4 - -define void @lshift_mem_cl_optsize(i64 %a, i64 %c) nounwind readnone optsize { -; GENERIC-LABEL: lshift_mem_cl_optsize: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movq %rsi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx -; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: lshift_mem_cl_optsize: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.50] -; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx -; BDVER12-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [4:11.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: lshift_mem_cl_optsize: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx -; BTVER2-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [9:11.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %b = load i64, i64* @x - %shl = shl i64 %b, %c - %sub = sub nsw i64 64, %c - %shr = lshr i64 %a, %sub - %or = or i64 %shl, %shr - store i64 %or, i64* @x - ret void -} - -define void @lshift_mem_cl(i64 %a, i64 %c) nounwind readnone { -; GENERIC-LABEL: lshift_mem_cl: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movq %rsi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx -; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: lshift_mem_cl: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50] -; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.50] -; BDVER12-NEXT: shlq %cl, %rax # sched: [1:0.50] -; BDVER12-NEXT: negb %cl # sched: [1:0.50] -; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx -; BDVER12-NEXT: shrq %cl, %rdi # sched: [1:0.50] -; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.50] -; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: lshift_mem_cl: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [3:1.00] -; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50] -; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50] -; BTVER2-NEXT: negb %cl # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx -; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50] -; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %b = load i64, i64* @x - %shl = shl i64 %b, %c - %sub = sub nsw i64 64, %c - %shr = lshr i64 %a, %sub - %or = or i64 %shl, %shr - store i64 %or, i64* @x - ret void -} - -define void @lshift_mem(i64 %a) nounwind readnone { -; GENERIC-LABEL: lshift_mem: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: lshift_mem: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50] -; BDVER12-NEXT: shrq $54, %rdi # sched: [1:0.50] -; BDVER12-NEXT: shlq $10, %rax # sched: [1:0.50] -; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.50] -; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: lshift_mem: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [3:1.00] -; BTVER2-NEXT: shrq $54, %rdi # sched: [1:0.50] -; BTVER2-NEXT: shlq $10, %rax # sched: [1:0.50] -; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50] -; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %b = load i64, i64* @x - %shl = shl i64 %b, 10 - %shr = lshr i64 %a, 54 - %or = or i64 %shr, %shl - store i64 %or, i64* @x - ret void -} - -define void @lshift_mem_optsize(i64 %a) nounwind readnone optsize { -; GENERIC-LABEL: lshift_mem_optsize: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: lshift_mem_optsize: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [4:11.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: lshift_mem_optsize: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [9:11.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %b = load i64, i64* @x - %shl = shl i64 %b, 10 - %shr = lshr i64 %a, 54 - %or = or i64 %shr, %shl - store i64 %or, i64* @x - ret void -} - -define void @lshift_mem_b(i64 %b) nounwind readnone { -; GENERIC-LABEL: lshift_mem_b: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50] -; GENERIC-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67] -; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: lshift_mem_b: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50] -; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50] -; BDVER12-NEXT: shrq $54, %rax # sched: [1:0.50] -; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50] -; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: lshift_mem_b: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [3:1.00] -; BTVER2-NEXT: shlq $10, %rdi # sched: [1:0.50] -; BTVER2-NEXT: shrq $54, %rax # sched: [1:0.50] -; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %a = load i64, i64* @x - %shl = shl i64 %b, 10 - %shr = lshr i64 %a, 54 - %or = or i64 %shr, %shl - store i64 %or, i64* @x - ret void -} - -define void @lshift_mem_b_optsize(i64 %b) nounwind readnone optsize { -; GENERIC-LABEL: lshift_mem_b_optsize: -; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50] -; GENERIC-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67] -; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: lshift_mem_b_optsize: -; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50] -; BDVER12-NEXT: shrdq $54, %rdi, %rax # sched: [4:3.00] -; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: lshift_mem_b_optsize: -; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [3:1.00] -; BTVER2-NEXT: shrdq $54, %rdi, %rax # sched: [3:3.00] -; BTVER2-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -entry: - %a = load i64, i64* @x - %shl = shl i64 %b, 10 - %shr = lshr i64 %a, 54 - %or = or i64 %shr, %shl - store i64 %or, i64* @x - ret void -} - Index: test/CodeGen/X86/schedule-x86_32.ll =================================================================== --- test/CodeGen/X86/schedule-x86_32.ll +++ test/CodeGen/X86/schedule-x86_32.ll @@ -1,2601 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=i686 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define i8 @test_aaa(i8 %a0) optsize { -; GENERIC-LABEL: test_aaa: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al -; GENERIC-NEXT: #APP -; GENERIC-NEXT: aaa -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_aaa: -; ATOM: # %bb.0: -; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: aaa # sched: [13:6.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_aaa: -; SLM: # %bb.0: -; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: aaa # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_aaa: -; SANDY: # %bb.0: -; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: aaa # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_aaa: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: aaa # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_aaa: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: aaa # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_aaa: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: aaa # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_aaa: -; SKX: # %bb.0: -; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: aaa # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_aaa: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: aaa # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_aaa: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: aaa # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_aaa: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: aaa # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - %1 = tail call i8 asm "aaa", "=r,r"(i8 %a0) nounwind - ret i8 %1 -} - -define void @test_aad(i16 %a0) optsize { -; GENERIC-LABEL: test_aad: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: #APP -; GENERIC-NEXT: aad -; GENERIC-NEXT: aad $16 -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_aad: -; ATOM: # %bb.0: -; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: aad # sched: [7:3.50] -; ATOM-NEXT: aad $16 # sched: [7:3.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_aad: -; SLM: # %bb.0: -; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: aad # sched: [100:1.00] -; SLM-NEXT: aad $16 # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_aad: -; SANDY: # %bb.0: -; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: aad # sched: [100:0.33] -; SANDY-NEXT: aad $16 # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_aad: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: aad # sched: [100:0.25] -; HASWELL-NEXT: aad $16 # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_aad: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: aad # sched: [100:0.25] -; BROADWELL-NEXT: aad $16 # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_aad: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: aad # sched: [100:0.25] -; SKYLAKE-NEXT: aad $16 # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_aad: -; SKX: # %bb.0: -; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: aad # sched: [100:0.25] -; SKX-NEXT: aad $16 # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_aad: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: aad # sched: [100:0.50] -; BDVER2-NEXT: aad $16 # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_aad: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: aad # sched: [100:0.50] -; BTVER2-NEXT: aad $16 # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_aad: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: aad # sched: [100:0.25] -; ZNVER1-NEXT: aad $16 # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm "aad \0A\09 aad $1", "r,i"(i16 %a0, i16 16) nounwind - ret void -} - -define void @test_aam(i8 %a0) optsize { -; GENERIC-LABEL: test_aam: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al -; GENERIC-NEXT: #APP -; GENERIC-NEXT: aam -; GENERIC-NEXT: aam $16 -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_aam: -; ATOM: # %bb.0: -; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: aam # sched: [21:10.50] -; ATOM-NEXT: aam $16 # sched: [21:10.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_aam: -; SLM: # %bb.0: -; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: aam # sched: [100:1.00] -; SLM-NEXT: aam $16 # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_aam: -; SANDY: # %bb.0: -; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: aam # sched: [100:0.33] -; SANDY-NEXT: aam $16 # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_aam: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: aam # sched: [100:0.25] -; HASWELL-NEXT: aam $16 # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_aam: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: aam # sched: [100:0.25] -; BROADWELL-NEXT: aam $16 # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_aam: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: aam # sched: [100:0.25] -; SKYLAKE-NEXT: aam $16 # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_aam: -; SKX: # %bb.0: -; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: aam # sched: [100:0.25] -; SKX-NEXT: aam $16 # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_aam: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: aam # sched: [100:0.50] -; BDVER2-NEXT: aam $16 # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_aam: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: aam # sched: [100:0.50] -; BTVER2-NEXT: aam $16 # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_aam: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: aam # sched: [100:0.25] -; ZNVER1-NEXT: aam $16 # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm "aam \0A\09 aam $1", "r,i"(i8 %a0, i8 16) nounwind - ret void -} - -define i8 @test_aas(i8 %a0) optsize { -; GENERIC-LABEL: test_aas: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al -; GENERIC-NEXT: #APP -; GENERIC-NEXT: aas -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_aas: -; ATOM: # %bb.0: -; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: aas # sched: [13:6.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_aas: -; SLM: # %bb.0: -; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: aas # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_aas: -; SANDY: # %bb.0: -; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: aas # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_aas: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: aas # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_aas: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: aas # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_aas: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: aas # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_aas: -; SKX: # %bb.0: -; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: aas # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_aas: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: aas # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_aas: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: aas # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_aas: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: aas # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - %1 = tail call i8 asm "aas", "=r,r"(i8 %a0) nounwind - ret i8 %1 -} - -define void @test_arpl(i16 %a0, i16 *%a1) optsize { -; GENERIC-LABEL: test_arpl: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: arpl %ax, (%ecx) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_arpl: -; ATOM: # %bb.0: -; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: arpl %ax, (%ecx) # sched: [23:11.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_arpl: -; SLM: # %bb.0: -; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: arpl %ax, (%ecx) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_arpl: -; SANDY: # %bb.0: -; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: arpl %ax, (%ecx) # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_arpl: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: arpl %ax, (%ecx) # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_arpl: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: arpl %ax, (%ecx) # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_arpl: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: arpl %ax, (%ecx) # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_arpl: -; SKX: # %bb.0: -; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: arpl %ax, (%ecx) # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_arpl: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: arpl %ax, (%ecx) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_arpl: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: arpl %ax, (%ecx) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_arpl: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: arpl %ax, (%ecx) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - call void asm sideeffect "arpl $0, $1", "r,*m"(i16 %a0, i16 *%a1) - ret void -} - -define void @test_bound(i16 %a0, i16 *%a1, i32 %a2, i32 *%a3) optsize { -; GENERIC-LABEL: test_bound: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pushl %esi -; GENERIC-NEXT: .cfi_def_cfa_offset 8 -; GENERIC-NEXT: .cfi_offset %esi, -8 -; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %esi -; GENERIC-NEXT: #APP -; GENERIC-NEXT: bound %ax, (%esi) -; GENERIC-NEXT: bound %ecx, (%edx) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: popl %esi -; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_bound: -; ATOM: # %bb.0: -; ATOM-NEXT: pushl %esi # sched: [1:1.00] -; ATOM-NEXT: .cfi_def_cfa_offset 8 -; ATOM-NEXT: .cfi_offset %esi, -8 -; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: bound %ax, (%esi) # sched: [11:5.50] -; ATOM-NEXT: bound %ecx, (%edx) # sched: [11:5.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: popl %esi # sched: [1:1.00] -; ATOM-NEXT: .cfi_def_cfa_offset 4 -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_bound: -; SLM: # %bb.0: -; SLM-NEXT: pushl %esi # sched: [1:1.00] -; SLM-NEXT: .cfi_def_cfa_offset 8 -; SLM-NEXT: .cfi_offset %esi, -8 -; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: bound %ax, (%esi) # sched: [100:1.00] -; SLM-NEXT: bound %ecx, (%edx) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: popl %esi # sched: [3:1.00] -; SLM-NEXT: .cfi_def_cfa_offset 4 -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_bound: -; SANDY: # %bb.0: -; SANDY-NEXT: pushl %esi # sched: [5:1.00] -; SANDY-NEXT: .cfi_def_cfa_offset 8 -; SANDY-NEXT: .cfi_offset %esi, -8 -; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: bound %ax, (%esi) # sched: [100:0.33] -; SANDY-NEXT: bound %ecx, (%edx) # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: popl %esi # sched: [6:0.50] -; SANDY-NEXT: .cfi_def_cfa_offset 4 -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_bound: -; HASWELL: # %bb.0: -; HASWELL-NEXT: pushl %esi # sched: [2:1.00] -; HASWELL-NEXT: .cfi_def_cfa_offset 8 -; HASWELL-NEXT: .cfi_offset %esi, -8 -; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: bound %ax, (%esi) # sched: [1:3.75] -; HASWELL-NEXT: bound %ecx, (%edx) # sched: [1:3.75] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: popl %esi # sched: [6:0.50] -; HASWELL-NEXT: .cfi_def_cfa_offset 4 -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bound: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: pushl %esi # sched: [2:1.00] -; BROADWELL-NEXT: .cfi_def_cfa_offset 8 -; BROADWELL-NEXT: .cfi_offset %esi, -8 -; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: bound %ax, (%esi) # sched: [100:0.25] -; BROADWELL-NEXT: bound %ecx, (%edx) # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: popl %esi # sched: [6:0.50] -; BROADWELL-NEXT: .cfi_def_cfa_offset 4 -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_bound: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: pushl %esi # sched: [2:1.00] -; SKYLAKE-NEXT: .cfi_def_cfa_offset 8 -; SKYLAKE-NEXT: .cfi_offset %esi, -8 -; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: bound %ax, (%esi) # sched: [100:0.25] -; SKYLAKE-NEXT: bound %ecx, (%edx) # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: popl %esi # sched: [6:0.50] -; SKYLAKE-NEXT: .cfi_def_cfa_offset 4 -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_bound: -; SKX: # %bb.0: -; SKX-NEXT: pushl %esi # sched: [2:1.00] -; SKX-NEXT: .cfi_def_cfa_offset 8 -; SKX-NEXT: .cfi_offset %esi, -8 -; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: bound %ax, (%esi) # sched: [100:0.25] -; SKX-NEXT: bound %ecx, (%edx) # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: popl %esi # sched: [6:0.50] -; SKX-NEXT: .cfi_def_cfa_offset 4 -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_bound: -; BDVER2: # %bb.0: -; BDVER2-NEXT: pushl %esi # sched: [1:1.00] -; BDVER2-NEXT: .cfi_def_cfa_offset 8 -; BDVER2-NEXT: .cfi_offset %esi, -8 -; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: bound %ax, (%esi) # sched: [100:0.50] -; BDVER2-NEXT: bound %ecx, (%edx) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: popl %esi # sched: [5:0.50] -; BDVER2-NEXT: .cfi_def_cfa_offset 4 -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_bound: -; BTVER2: # %bb.0: -; BTVER2-NEXT: pushl %esi # sched: [1:1.00] -; BTVER2-NEXT: .cfi_def_cfa_offset 8 -; BTVER2-NEXT: .cfi_offset %esi, -8 -; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: bound %ax, (%esi) # sched: [100:0.50] -; BTVER2-NEXT: bound %ecx, (%edx) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: popl %esi # sched: [3:1.00] -; BTVER2-NEXT: .cfi_def_cfa_offset 4 -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bound: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: pushl %esi # sched: [1:0.50] -; ZNVER1-NEXT: .cfi_def_cfa_offset 8 -; ZNVER1-NEXT: .cfi_offset %esi, -8 -; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: bound %ax, (%esi) # sched: [100:0.25] -; ZNVER1-NEXT: bound %ecx, (%edx) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: popl %esi # sched: [8:0.50] -; ZNVER1-NEXT: .cfi_def_cfa_offset 4 -; ZNVER1-NEXT: retl # sched: [1:0.50] - call void asm sideeffect "bound $0, $1 \0A\09 bound $2, $3", "r,*m,r,*m"(i16 %a0, i16 *%a1, i32 %a2, i32 *%a3) - ret void -} - -; TODO - test_call - -define i8 @test_daa(i8 %a0) optsize { -; GENERIC-LABEL: test_daa: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al -; GENERIC-NEXT: #APP -; GENERIC-NEXT: daa -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_daa: -; ATOM: # %bb.0: -; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: daa # sched: [18:9.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_daa: -; SLM: # %bb.0: -; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: daa # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_daa: -; SANDY: # %bb.0: -; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: daa # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_daa: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: daa # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_daa: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: daa # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_daa: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: daa # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_daa: -; SKX: # %bb.0: -; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: daa # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_daa: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: daa # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_daa: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: daa # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_daa: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: daa # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - %1 = tail call i8 asm "daa", "=r,r"(i8 %a0) nounwind - ret i8 %1 -} - -define i8 @test_das(i8 %a0) optsize { -; GENERIC-LABEL: test_das: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al -; GENERIC-NEXT: #APP -; GENERIC-NEXT: das -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_das: -; ATOM: # %bb.0: -; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: das # sched: [20:10.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_das: -; SLM: # %bb.0: -; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: das # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_das: -; SANDY: # %bb.0: -; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: das # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_das: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: das # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_das: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: das # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_das: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: das # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_das: -; SKX: # %bb.0: -; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: das # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_das: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: das # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_das: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: das # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_das: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: das # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - %1 = tail call i8 asm "das", "=r,r"(i8 %a0) nounwind - ret i8 %1 -} - -define void @test_dec16(i16 %a0, i16* %a1) optsize { -; GENERIC-LABEL: test_dec16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: decw %ax -; GENERIC-NEXT: decw (%ecx) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_dec16: -; ATOM: # %bb.0: -; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: decw %ax # sched: [1:0.50] -; ATOM-NEXT: decw (%ecx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_dec16: -; SLM: # %bb.0: -; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: decw %ax # sched: [1:0.50] -; SLM-NEXT: decw (%ecx) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_dec16: -; SANDY: # %bb.0: -; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: decw %ax # sched: [1:0.33] -; SANDY-NEXT: decw (%ecx) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_dec16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: decw %ax # sched: [1:0.25] -; HASWELL-NEXT: decw (%ecx) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_dec16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: decw %ax # sched: [1:0.25] -; BROADWELL-NEXT: decw (%ecx) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_dec16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: decw %ax # sched: [1:0.25] -; SKYLAKE-NEXT: decw (%ecx) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_dec16: -; SKX: # %bb.0: -; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: decw %ax # sched: [1:0.25] -; SKX-NEXT: decw (%ecx) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_dec16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: decw %ax # sched: [1:0.50] -; BDVER2-NEXT: decw (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_dec16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: decw %ax # sched: [1:0.50] -; BTVER2-NEXT: decw (%ecx) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_dec16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: decw %ax # sched: [1:0.25] -; ZNVER1-NEXT: decw (%ecx) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm "decw $0 \0A\09 decw $1", "r,*m"(i16 %a0, i16* %a1) nounwind - ret void -} -define void @test_dec32(i32 %a0, i32* %a1) optsize { -; GENERIC-LABEL: test_dec32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: decl %eax -; GENERIC-NEXT: decl (%ecx) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_dec32: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: decl %eax # sched: [1:0.50] -; ATOM-NEXT: decl (%ecx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_dec32: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: decl %eax # sched: [1:0.50] -; SLM-NEXT: decl (%ecx) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_dec32: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: decl %eax # sched: [1:0.33] -; SANDY-NEXT: decl (%ecx) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_dec32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: decl %eax # sched: [1:0.25] -; HASWELL-NEXT: decl (%ecx) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_dec32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: decl %eax # sched: [1:0.25] -; BROADWELL-NEXT: decl (%ecx) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_dec32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: decl %eax # sched: [1:0.25] -; SKYLAKE-NEXT: decl (%ecx) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_dec32: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: decl %eax # sched: [1:0.25] -; SKX-NEXT: decl (%ecx) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_dec32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: decl %eax # sched: [1:0.50] -; BDVER2-NEXT: decl (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_dec32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: decl %eax # sched: [1:0.50] -; BTVER2-NEXT: decl (%ecx) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_dec32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: decl %eax # sched: [1:0.25] -; ZNVER1-NEXT: decl (%ecx) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm "decl $0 \0A\09 decl $1", "r,*m"(i32 %a0, i32* %a1) nounwind - ret void -} - -define void @test_inc16(i16 %a0, i16* %a1) optsize { -; GENERIC-LABEL: test_inc16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: incw %ax -; GENERIC-NEXT: incw (%ecx) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_inc16: -; ATOM: # %bb.0: -; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: incw %ax # sched: [1:0.50] -; ATOM-NEXT: incw (%ecx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_inc16: -; SLM: # %bb.0: -; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: incw %ax # sched: [1:0.50] -; SLM-NEXT: incw (%ecx) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_inc16: -; SANDY: # %bb.0: -; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: incw %ax # sched: [1:0.33] -; SANDY-NEXT: incw (%ecx) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_inc16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: incw %ax # sched: [1:0.25] -; HASWELL-NEXT: incw (%ecx) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_inc16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: incw %ax # sched: [1:0.25] -; BROADWELL-NEXT: incw (%ecx) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_inc16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: incw %ax # sched: [1:0.25] -; SKYLAKE-NEXT: incw (%ecx) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_inc16: -; SKX: # %bb.0: -; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: incw %ax # sched: [1:0.25] -; SKX-NEXT: incw (%ecx) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_inc16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: incw %ax # sched: [1:0.50] -; BDVER2-NEXT: incw (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_inc16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: incw %ax # sched: [1:0.50] -; BTVER2-NEXT: incw (%ecx) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_inc16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: incw %ax # sched: [1:0.25] -; ZNVER1-NEXT: incw (%ecx) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm "incw $0 \0A\09 incw $1", "r,*m"(i16 %a0, i16* %a1) nounwind - ret void -} -define void @test_inc32(i32 %a0, i32* %a1) optsize { -; GENERIC-LABEL: test_inc32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: incl %eax -; GENERIC-NEXT: incl (%ecx) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_inc32: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: incl %eax # sched: [1:0.50] -; ATOM-NEXT: incl (%ecx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_inc32: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: incl %eax # sched: [1:0.50] -; SLM-NEXT: incl (%ecx) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_inc32: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: incl %eax # sched: [1:0.33] -; SANDY-NEXT: incl (%ecx) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_inc32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: incl %eax # sched: [1:0.25] -; HASWELL-NEXT: incl (%ecx) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_inc32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: incl %eax # sched: [1:0.25] -; BROADWELL-NEXT: incl (%ecx) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_inc32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: incl %eax # sched: [1:0.25] -; SKYLAKE-NEXT: incl (%ecx) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_inc32: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: incl %eax # sched: [1:0.25] -; SKX-NEXT: incl (%ecx) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_inc32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: incl %eax # sched: [1:0.50] -; BDVER2-NEXT: incl (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_inc32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: incl %eax # sched: [1:0.50] -; BTVER2-NEXT: incl (%ecx) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_inc32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: incl %eax # sched: [1:0.25] -; ZNVER1-NEXT: incl (%ecx) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm "incl $0 \0A\09 incl $1", "r,*m"(i32 %a0, i32* %a1) nounwind - ret void -} - -define void @test_into() optsize { -; GENERIC-LABEL: test_into: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: into -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_into: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: into # sched: [6:3.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_into: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: into # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_into: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: into # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_into: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: into # sched: [1:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_into: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: into # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_into: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: into # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_into: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: into # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_into: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: into # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_into: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: into # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_into: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: into # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - call void asm sideeffect "into", ""() - ret void -} - -; TODO - test_jmp - -define void @test_jcxz_jecxz() optsize { -; GENERIC-LABEL: test_jcxz_jecxz: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: JXTGT: -; GENERIC-NEXT: jcxz JXTGT -; GENERIC-NEXT: jecxz JXTGT -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_jcxz_jecxz: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: JXTGT: -; ATOM-NEXT: jcxz JXTGT # sched: [4:2.00] -; ATOM-NEXT: jecxz JXTGT # sched: [4:2.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_jcxz_jecxz: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: JXTGT: -; SLM-NEXT: jcxz JXTGT # sched: [1:1.00] -; SLM-NEXT: jecxz JXTGT # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_jcxz_jecxz: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: JXTGT: -; SANDY-NEXT: jcxz JXTGT # sched: [2:1.00] -; SANDY-NEXT: jecxz JXTGT # sched: [2:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_jcxz_jecxz: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: JXTGT: -; HASWELL-NEXT: jcxz JXTGT # sched: [2:0.50] -; HASWELL-NEXT: jecxz JXTGT # sched: [2:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_jcxz_jecxz: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: JXTGT: -; BROADWELL-NEXT: jcxz JXTGT # sched: [2:0.50] -; BROADWELL-NEXT: jecxz JXTGT # sched: [2:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_jcxz_jecxz: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: JXTGT: -; SKYLAKE-NEXT: jcxz JXTGT # sched: [2:0.50] -; SKYLAKE-NEXT: jecxz JXTGT # sched: [2:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_jcxz_jecxz: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: JXTGT: -; SKX-NEXT: jcxz JXTGT # sched: [2:0.50] -; SKX-NEXT: jecxz JXTGT # sched: [2:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_jcxz_jecxz: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: JXTGT: -; BDVER2-NEXT: jcxz JXTGT # sched: [1:1.00] -; BDVER2-NEXT: jecxz JXTGT # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_jcxz_jecxz: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: JXTGT: -; BTVER2-NEXT: jcxz JXTGT # sched: [1:0.50] -; BTVER2-NEXT: jecxz JXTGT # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_jcxz_jecxz: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: JXTGT: -; ZNVER1-NEXT: jcxz JXTGT # sched: [1:0.50] -; ZNVER1-NEXT: jecxz JXTGT # sched: [1:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - call void asm sideeffect "JXTGT: \0A\09 jcxz JXTGT \0A\09 jecxz JXTGT", ""() - ret void -} - -; TODO - test_lds - -define void @test_leave() optsize { -; GENERIC-LABEL: test_leave: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: leave -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_leave: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: leave # sched: [2:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_leave: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: leave # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_leave: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: leave # sched: [7:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_leave: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: leave # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_leave: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: leave # sched: [7:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_leave: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: leave # sched: [7:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_leave: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: leave # sched: [7:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_leave: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: leave # sched: [1:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_leave: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: leave # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_leave: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: leave # sched: [8:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm "leave", ""() nounwind - ret void -} - -; TODO - test_les - -define void @test_pop_push() optsize { -; GENERIC-LABEL: test_pop_push: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: popl %ds -; GENERIC-NEXT: popl %es -; GENERIC-NEXT: popl %ss -; GENERIC-NEXT: popl %fs -; GENERIC-NEXT: popl %gs -; GENERIC-NEXT: pushl %cs -; GENERIC-NEXT: pushl %ds -; GENERIC-NEXT: pushl %es -; GENERIC-NEXT: pushl %ss -; GENERIC-NEXT: pushl %fs -; GENERIC-NEXT: pushl %gs -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_pop_push: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: popl %ds # sched: [29:14.50] -; ATOM-NEXT: popl %es # sched: [29:14.50] -; ATOM-NEXT: popl %ss # sched: [48:24.00] -; ATOM-NEXT: popl %fs # sched: [29:14.50] -; ATOM-NEXT: popl %gs # sched: [29:14.50] -; ATOM-NEXT: pushl %cs # sched: [2:1.00] -; ATOM-NEXT: pushl %ds # sched: [2:1.00] -; ATOM-NEXT: pushl %es # sched: [2:1.00] -; ATOM-NEXT: pushl %ss # sched: [2:1.00] -; ATOM-NEXT: pushl %fs # sched: [2:1.00] -; ATOM-NEXT: pushl %gs # sched: [2:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_pop_push: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: popl %ds # sched: [100:1.00] -; SLM-NEXT: popl %es # sched: [100:1.00] -; SLM-NEXT: popl %ss # sched: [100:1.00] -; SLM-NEXT: popl %fs # sched: [100:1.00] -; SLM-NEXT: popl %gs # sched: [100:1.00] -; SLM-NEXT: pushl %cs # sched: [100:1.00] -; SLM-NEXT: pushl %ds # sched: [100:1.00] -; SLM-NEXT: pushl %es # sched: [100:1.00] -; SLM-NEXT: pushl %ss # sched: [100:1.00] -; SLM-NEXT: pushl %fs # sched: [100:1.00] -; SLM-NEXT: pushl %gs # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_pop_push: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: popl %ds # sched: [100:0.33] -; SANDY-NEXT: popl %es # sched: [100:0.33] -; SANDY-NEXT: popl %ss # sched: [100:0.33] -; SANDY-NEXT: popl %fs # sched: [100:0.33] -; SANDY-NEXT: popl %gs # sched: [100:0.33] -; SANDY-NEXT: pushl %cs # sched: [100:0.33] -; SANDY-NEXT: pushl %ds # sched: [100:0.33] -; SANDY-NEXT: pushl %es # sched: [100:0.33] -; SANDY-NEXT: pushl %ss # sched: [100:0.33] -; SANDY-NEXT: pushl %fs # sched: [100:0.33] -; SANDY-NEXT: pushl %gs # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_pop_push: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: popl %ds # sched: [100:0.25] -; HASWELL-NEXT: popl %es # sched: [100:0.25] -; HASWELL-NEXT: popl %ss # sched: [100:0.25] -; HASWELL-NEXT: popl %fs # sched: [100:0.25] -; HASWELL-NEXT: popl %gs # sched: [100:0.25] -; HASWELL-NEXT: pushl %cs # sched: [100:0.25] -; HASWELL-NEXT: pushl %ds # sched: [100:0.25] -; HASWELL-NEXT: pushl %es # sched: [100:0.25] -; HASWELL-NEXT: pushl %ss # sched: [100:0.25] -; HASWELL-NEXT: pushl %fs # sched: [100:0.25] -; HASWELL-NEXT: pushl %gs # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pop_push: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: popl %ds # sched: [100:0.25] -; BROADWELL-NEXT: popl %es # sched: [100:0.25] -; BROADWELL-NEXT: popl %ss # sched: [100:0.25] -; BROADWELL-NEXT: popl %fs # sched: [100:0.25] -; BROADWELL-NEXT: popl %gs # sched: [100:0.25] -; BROADWELL-NEXT: pushl %cs # sched: [100:0.25] -; BROADWELL-NEXT: pushl %ds # sched: [100:0.25] -; BROADWELL-NEXT: pushl %es # sched: [100:0.25] -; BROADWELL-NEXT: pushl %ss # sched: [100:0.25] -; BROADWELL-NEXT: pushl %fs # sched: [100:0.25] -; BROADWELL-NEXT: pushl %gs # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_pop_push: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: popl %ds # sched: [100:0.25] -; SKYLAKE-NEXT: popl %es # sched: [100:0.25] -; SKYLAKE-NEXT: popl %ss # sched: [100:0.25] -; SKYLAKE-NEXT: popl %fs # sched: [100:0.25] -; SKYLAKE-NEXT: popl %gs # sched: [100:0.25] -; SKYLAKE-NEXT: pushl %cs # sched: [100:0.25] -; SKYLAKE-NEXT: pushl %ds # sched: [100:0.25] -; SKYLAKE-NEXT: pushl %es # sched: [100:0.25] -; SKYLAKE-NEXT: pushl %ss # sched: [100:0.25] -; SKYLAKE-NEXT: pushl %fs # sched: [100:0.25] -; SKYLAKE-NEXT: pushl %gs # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_pop_push: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: popl %ds # sched: [100:0.25] -; SKX-NEXT: popl %es # sched: [100:0.25] -; SKX-NEXT: popl %ss # sched: [100:0.25] -; SKX-NEXT: popl %fs # sched: [100:0.25] -; SKX-NEXT: popl %gs # sched: [100:0.25] -; SKX-NEXT: pushl %cs # sched: [100:0.25] -; SKX-NEXT: pushl %ds # sched: [100:0.25] -; SKX-NEXT: pushl %es # sched: [100:0.25] -; SKX-NEXT: pushl %ss # sched: [100:0.25] -; SKX-NEXT: pushl %fs # sched: [100:0.25] -; SKX-NEXT: pushl %gs # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_pop_push: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: popl %ds # sched: [100:0.50] -; BDVER2-NEXT: popl %es # sched: [100:0.50] -; BDVER2-NEXT: popl %ss # sched: [100:0.50] -; BDVER2-NEXT: popl %fs # sched: [100:0.50] -; BDVER2-NEXT: popl %gs # sched: [100:0.50] -; BDVER2-NEXT: pushl %cs # sched: [100:0.50] -; BDVER2-NEXT: pushl %ds # sched: [100:0.50] -; BDVER2-NEXT: pushl %es # sched: [100:0.50] -; BDVER2-NEXT: pushl %ss # sched: [100:0.50] -; BDVER2-NEXT: pushl %fs # sched: [100:0.50] -; BDVER2-NEXT: pushl %gs # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_pop_push: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: popl %ds # sched: [100:0.50] -; BTVER2-NEXT: popl %es # sched: [100:0.50] -; BTVER2-NEXT: popl %ss # sched: [100:0.50] -; BTVER2-NEXT: popl %fs # sched: [100:0.50] -; BTVER2-NEXT: popl %gs # sched: [100:0.50] -; BTVER2-NEXT: pushl %cs # sched: [100:0.50] -; BTVER2-NEXT: pushl %ds # sched: [100:0.50] -; BTVER2-NEXT: pushl %es # sched: [100:0.50] -; BTVER2-NEXT: pushl %ss # sched: [100:0.50] -; BTVER2-NEXT: pushl %fs # sched: [100:0.50] -; BTVER2-NEXT: pushl %gs # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pop_push: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: popl %ds # sched: [100:0.25] -; ZNVER1-NEXT: popl %es # sched: [100:0.25] -; ZNVER1-NEXT: popl %ss # sched: [100:0.25] -; ZNVER1-NEXT: popl %fs # sched: [100:0.25] -; ZNVER1-NEXT: popl %gs # sched: [100:0.25] -; ZNVER1-NEXT: pushl %cs # sched: [100:0.25] -; ZNVER1-NEXT: pushl %ds # sched: [100:0.25] -; ZNVER1-NEXT: pushl %es # sched: [100:0.25] -; ZNVER1-NEXT: pushl %ss # sched: [100:0.25] -; ZNVER1-NEXT: pushl %fs # sched: [100:0.25] -; ZNVER1-NEXT: pushl %gs # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - call void asm sideeffect "pop %DS \0A\09 pop %ES \0A\09 pop %SS \0A\09 pop %FS \0A\09 pop %GS \0A\09 push %CS \0A\09 push %DS \0A\09 push %ES \0A\09 push %SS \0A\09 push %FS \0A\09 push %GS", ""() - ret void -} -define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize { -; GENERIC-LABEL: test_pop_push_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: popw %ax -; GENERIC-NEXT: popw (%ecx) -; GENERIC-NEXT: pushw %ax -; GENERIC-NEXT: pushw (%ecx) -; GENERIC-NEXT: pushw $4095 # imm = 0xFFF -; GENERIC-NEXT: pushw $7 -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_pop_push_16: -; ATOM: # %bb.0: -; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: popw %ax # sched: [2:1.00] -; ATOM-NEXT: popw (%ecx) # sched: [3:1.50] -; ATOM-NEXT: pushw %ax # sched: [1:1.00] -; ATOM-NEXT: pushw (%ecx) # sched: [2:1.00] -; ATOM-NEXT: pushw $4095 # imm = 0xFFF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: pushw $7 # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_pop_push_16: -; SLM: # %bb.0: -; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: popw %ax # sched: [3:1.00] -; SLM-NEXT: popw (%ecx) # sched: [4:2.00] -; SLM-NEXT: pushw %ax # sched: [1:1.00] -; SLM-NEXT: pushw (%ecx) # sched: [4:2.00] -; SLM-NEXT: pushw $4095 # imm = 0xFFF -; SLM-NEXT: # sched: [1:1.00] -; SLM-NEXT: pushw $7 # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_pop_push_16: -; SANDY: # %bb.0: -; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: popw %ax # sched: [6:0.50] -; SANDY-NEXT: popw (%ecx) # sched: [6:0.50] -; SANDY-NEXT: pushw %ax # sched: [5:1.00] -; SANDY-NEXT: pushw (%ecx) # sched: [5:1.00] -; SANDY-NEXT: pushw $4095 # imm = 0xFFF -; SANDY-NEXT: # sched: [1:1.00] -; SANDY-NEXT: pushw $7 # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_pop_push_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: popw %ax # sched: [6:0.50] -; HASWELL-NEXT: popw (%ecx) # sched: [7:1.00] -; HASWELL-NEXT: pushw %ax # sched: [2:1.00] -; HASWELL-NEXT: pushw (%ecx) # sched: [7:1.00] -; HASWELL-NEXT: pushw $4095 # imm = 0xFFF -; HASWELL-NEXT: # sched: [1:1.00] -; HASWELL-NEXT: pushw $7 # sched: [1:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pop_push_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: popw %ax # sched: [6:0.50] -; BROADWELL-NEXT: popw (%ecx) # sched: [6:1.00] -; BROADWELL-NEXT: pushw %ax # sched: [2:1.00] -; BROADWELL-NEXT: pushw (%ecx) # sched: [6:1.00] -; BROADWELL-NEXT: pushw $4095 # imm = 0xFFF -; BROADWELL-NEXT: # sched: [1:1.00] -; BROADWELL-NEXT: pushw $7 # sched: [1:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_pop_push_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: popw %ax # sched: [6:0.50] -; SKYLAKE-NEXT: popw (%ecx) # sched: [6:1.00] -; SKYLAKE-NEXT: pushw %ax # sched: [2:1.00] -; SKYLAKE-NEXT: pushw (%ecx) # sched: [6:1.00] -; SKYLAKE-NEXT: pushw $4095 # imm = 0xFFF -; SKYLAKE-NEXT: # sched: [1:1.00] -; SKYLAKE-NEXT: pushw $7 # sched: [1:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_pop_push_16: -; SKX: # %bb.0: -; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: popw %ax # sched: [6:0.50] -; SKX-NEXT: popw (%ecx) # sched: [6:1.00] -; SKX-NEXT: pushw %ax # sched: [2:1.00] -; SKX-NEXT: pushw (%ecx) # sched: [6:1.00] -; SKX-NEXT: pushw $4095 # imm = 0xFFF -; SKX-NEXT: # sched: [1:1.00] -; SKX-NEXT: pushw $7 # sched: [1:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_pop_push_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: popw %ax # sched: [5:0.50] -; BDVER2-NEXT: popw (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: pushw %ax # sched: [1:1.00] -; BDVER2-NEXT: pushw (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: pushw $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: pushw $7 # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_pop_push_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: popw %ax # sched: [3:1.00] -; BTVER2-NEXT: popw (%ecx) # sched: [4:1.00] -; BTVER2-NEXT: pushw %ax # sched: [1:1.00] -; BTVER2-NEXT: pushw (%ecx) # sched: [4:1.00] -; BTVER2-NEXT: pushw $4095 # imm = 0xFFF -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: pushw $7 # sched: [1:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pop_push_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: popw %ax # sched: [8:0.50] -; ZNVER1-NEXT: popw (%ecx) # sched: [5:0.50] -; ZNVER1-NEXT: pushw %ax # sched: [1:0.50] -; ZNVER1-NEXT: pushw (%ecx) # sched: [4:0.50] -; ZNVER1-NEXT: pushw $4095 # imm = 0xFFF -; ZNVER1-NEXT: # sched: [1:0.50] -; ZNVER1-NEXT: pushw $7 # sched: [1:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - %1 = call i16 asm sideeffect "popw $0 \0A\09 popw $2 \0A\09 pushw $1 \0A\09 pushw $2 \0A\09 pushw $3 \0A\09 pushw $4", "=r,r,*m,i,i"(i16 %a0, i16 *%a1, i16 4095, i8 7) - ret i16 %1 -} -define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize { -; GENERIC-LABEL: test_pop_push_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: popl %eax -; GENERIC-NEXT: popl (%ecx) -; GENERIC-NEXT: pushl %eax -; GENERIC-NEXT: pushl (%ecx) -; GENERIC-NEXT: pushl $4095 # imm = 0xFFF -; GENERIC-NEXT: pushl $7 -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_pop_push_32: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: popl %eax # sched: [1:1.00] -; ATOM-NEXT: popl (%ecx) # sched: [3:1.50] -; ATOM-NEXT: pushl %eax # sched: [1:1.00] -; ATOM-NEXT: pushl (%ecx) # sched: [2:1.00] -; ATOM-NEXT: pushl $4095 # imm = 0xFFF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: pushl $7 # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_pop_push_32: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: popl %eax # sched: [3:1.00] -; SLM-NEXT: popl (%ecx) # sched: [4:2.00] -; SLM-NEXT: pushl %eax # sched: [1:1.00] -; SLM-NEXT: pushl (%ecx) # sched: [4:2.00] -; SLM-NEXT: pushl $4095 # imm = 0xFFF -; SLM-NEXT: # sched: [1:1.00] -; SLM-NEXT: pushl $7 # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_pop_push_32: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: popl %eax # sched: [6:0.50] -; SANDY-NEXT: popl (%ecx) # sched: [6:0.50] -; SANDY-NEXT: pushl %eax # sched: [5:1.00] -; SANDY-NEXT: pushl (%ecx) # sched: [5:1.00] -; SANDY-NEXT: pushl $4095 # imm = 0xFFF -; SANDY-NEXT: # sched: [1:1.00] -; SANDY-NEXT: pushl $7 # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_pop_push_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: popl %eax # sched: [6:0.50] -; HASWELL-NEXT: popl (%ecx) # sched: [7:1.00] -; HASWELL-NEXT: pushl %eax # sched: [2:1.00] -; HASWELL-NEXT: pushl (%ecx) # sched: [7:1.00] -; HASWELL-NEXT: pushl $4095 # imm = 0xFFF -; HASWELL-NEXT: # sched: [1:1.00] -; HASWELL-NEXT: pushl $7 # sched: [1:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pop_push_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: popl %eax # sched: [6:0.50] -; BROADWELL-NEXT: popl (%ecx) # sched: [6:1.00] -; BROADWELL-NEXT: pushl %eax # sched: [2:1.00] -; BROADWELL-NEXT: pushl (%ecx) # sched: [6:1.00] -; BROADWELL-NEXT: pushl $4095 # imm = 0xFFF -; BROADWELL-NEXT: # sched: [1:1.00] -; BROADWELL-NEXT: pushl $7 # sched: [1:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_pop_push_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: popl %eax # sched: [6:0.50] -; SKYLAKE-NEXT: popl (%ecx) # sched: [6:1.00] -; SKYLAKE-NEXT: pushl %eax # sched: [2:1.00] -; SKYLAKE-NEXT: pushl (%ecx) # sched: [6:1.00] -; SKYLAKE-NEXT: pushl $4095 # imm = 0xFFF -; SKYLAKE-NEXT: # sched: [1:1.00] -; SKYLAKE-NEXT: pushl $7 # sched: [1:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_pop_push_32: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: popl %eax # sched: [6:0.50] -; SKX-NEXT: popl (%ecx) # sched: [6:1.00] -; SKX-NEXT: pushl %eax # sched: [2:1.00] -; SKX-NEXT: pushl (%ecx) # sched: [6:1.00] -; SKX-NEXT: pushl $4095 # imm = 0xFFF -; SKX-NEXT: # sched: [1:1.00] -; SKX-NEXT: pushl $7 # sched: [1:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_pop_push_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: popl %eax # sched: [5:0.50] -; BDVER2-NEXT: popl (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: pushl %eax # sched: [1:1.00] -; BDVER2-NEXT: pushl (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: pushl $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: pushl $7 # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_pop_push_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: popl %eax # sched: [3:1.00] -; BTVER2-NEXT: popl (%ecx) # sched: [4:1.00] -; BTVER2-NEXT: pushl %eax # sched: [1:1.00] -; BTVER2-NEXT: pushl (%ecx) # sched: [4:1.00] -; BTVER2-NEXT: pushl $4095 # imm = 0xFFF -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: pushl $7 # sched: [1:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pop_push_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: popl %eax # sched: [8:0.50] -; ZNVER1-NEXT: popl (%ecx) # sched: [9:1.00] -; ZNVER1-NEXT: pushl %eax # sched: [1:0.50] -; ZNVER1-NEXT: pushl (%ecx) # sched: [4:0.50] -; ZNVER1-NEXT: pushl $4095 # imm = 0xFFF -; ZNVER1-NEXT: # sched: [1:0.50] -; ZNVER1-NEXT: pushl $7 # sched: [1:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - %1 = call i32 asm sideeffect "popl $0 \0A\09 popl $2 \0A\09 pushl $1 \0A\09 pushl $2 \0A\09 pushl $3 \0A\09 pushl $4", "=r,r,*m,i,i"(i32 %a0, i32 *%a1, i32 4095, i8 7) - ret i32 %1 -} - -define void @test_popa_popf_pusha_pushf() optsize { -; GENERIC-LABEL: test_popa_popf_pusha_pushf: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: popal -; GENERIC-NEXT: popfl -; GENERIC-NEXT: pushal -; GENERIC-NEXT: pushfl -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_popa_popf_pusha_pushf: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: popal # sched: [9:4.50] -; ATOM-NEXT: popfl # sched: [26:13.00] -; ATOM-NEXT: pushal # sched: [8:4.00] -; ATOM-NEXT: pushfl # sched: [9:4.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_popa_popf_pusha_pushf: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: popal # sched: [3:1.00] -; SLM-NEXT: popfl # sched: [3:1.00] -; SLM-NEXT: pushal # sched: [1:1.00] -; SLM-NEXT: pushfl # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_popa_popf_pusha_pushf: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: popal # sched: [5:0.50] -; SANDY-NEXT: popfl # sched: [5:0.50] -; SANDY-NEXT: pushal # sched: [1:1.00] -; SANDY-NEXT: pushfl # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_popa_popf_pusha_pushf: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: popal # sched: [1:4.50] -; HASWELL-NEXT: popfl # sched: [5:0.50] -; HASWELL-NEXT: pushal # sched: [1:4.75] -; HASWELL-NEXT: pushfl # sched: [1:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_popa_popf_pusha_pushf: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: popal # sched: [5:0.50] -; BROADWELL-NEXT: popfl # sched: [5:0.50] -; BROADWELL-NEXT: pushal # sched: [1:1.00] -; BROADWELL-NEXT: pushfl # sched: [1:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_popa_popf_pusha_pushf: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: popal # sched: [5:0.50] -; SKYLAKE-NEXT: popfl # sched: [5:0.50] -; SKYLAKE-NEXT: pushal # sched: [1:1.00] -; SKYLAKE-NEXT: pushfl # sched: [1:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_popa_popf_pusha_pushf: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: popal # sched: [5:0.50] -; SKX-NEXT: popfl # sched: [5:0.50] -; SKX-NEXT: pushal # sched: [1:1.00] -; SKX-NEXT: pushfl # sched: [1:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_popa_popf_pusha_pushf: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: popal # sched: [5:0.50] -; BDVER2-NEXT: popfl # sched: [5:0.50] -; BDVER2-NEXT: pushal # sched: [1:1.00] -; BDVER2-NEXT: pushfl # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_popa_popf_pusha_pushf: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: popal # sched: [3:1.00] -; BTVER2-NEXT: popfl # sched: [3:1.00] -; BTVER2-NEXT: pushal # sched: [1:1.00] -; BTVER2-NEXT: pushfl # sched: [1:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_popa_popf_pusha_pushf: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: popal # sched: [100:0.25] -; ZNVER1-NEXT: popfl # sched: [100:0.25] -; ZNVER1-NEXT: pushal # sched: [8:0.50] -; ZNVER1-NEXT: pushfl # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - call void asm sideeffect "popa \0A\09 popf \0A\09 pusha \0A\09 pushf", ""() - ret void -} - -define void @test_ret() optsize { -; GENERIC-LABEL: test_ret: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: retl -; GENERIC-NEXT: retl $4095 # imm = 0xFFF -; GENERIC-NEXT: lretl -; GENERIC-NEXT: lretl $4095 # imm = 0xFFF -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_ret: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: retl # sched: [79:39.50] -; ATOM-NEXT: retl $4095 # imm = 0xFFF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: lretl # sched: [79:39.50] -; ATOM-NEXT: lretl $4095 # imm = 0xFFF -; ATOM-NEXT: # sched: [79:39.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_ret: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: retl # sched: [4:1.00] -; SLM-NEXT: retl $4095 # imm = 0xFFF -; SLM-NEXT: # sched: [4:1.00] -; SLM-NEXT: lretl # sched: [4:1.00] -; SLM-NEXT: lretl $4095 # imm = 0xFFF -; SLM-NEXT: # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_ret: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: retl # sched: [6:1.00] -; SANDY-NEXT: retl $4095 # imm = 0xFFF -; SANDY-NEXT: # sched: [6:1.00] -; SANDY-NEXT: lretl # sched: [6:1.00] -; SANDY-NEXT: lretl $4095 # imm = 0xFFF -; SANDY-NEXT: # sched: [6:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_ret: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; HASWELL-NEXT: retl $4095 # imm = 0xFFF -; HASWELL-NEXT: # sched: [1:2.00] -; HASWELL-NEXT: lretl # sched: [6:0.50] -; HASWELL-NEXT: lretl $4095 # imm = 0xFFF -; HASWELL-NEXT: # sched: [1:2.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ret: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; BROADWELL-NEXT: retl $4095 # imm = 0xFFF -; BROADWELL-NEXT: # sched: [6:0.50] -; BROADWELL-NEXT: lretl # sched: [6:0.50] -; BROADWELL-NEXT: lretl $4095 # imm = 0xFFF -; BROADWELL-NEXT: # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_ret: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; SKYLAKE-NEXT: retl $4095 # imm = 0xFFF -; SKYLAKE-NEXT: # sched: [6:0.50] -; SKYLAKE-NEXT: lretl # sched: [6:0.50] -; SKYLAKE-NEXT: lretl $4095 # imm = 0xFFF -; SKYLAKE-NEXT: # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_ret: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: retl # sched: [6:0.50] -; SKX-NEXT: retl $4095 # imm = 0xFFF -; SKX-NEXT: # sched: [6:0.50] -; SKX-NEXT: lretl # sched: [6:0.50] -; SKX-NEXT: lretl $4095 # imm = 0xFFF -; SKX-NEXT: # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_ret: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; BDVER2-NEXT: retl $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [5:1.00] -; BDVER2-NEXT: lretl # sched: [5:1.00] -; BDVER2-NEXT: lretl $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_ret: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; BTVER2-NEXT: retl $4095 # imm = 0xFFF -; BTVER2-NEXT: # sched: [4:1.00] -; BTVER2-NEXT: lretl # sched: [4:1.00] -; BTVER2-NEXT: lretl $4095 # imm = 0xFFF -; BTVER2-NEXT: # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ret: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: retl # sched: [1:0.50] -; ZNVER1-NEXT: retl $4095 # imm = 0xFFF -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: lretl # sched: [1:0.50] -; ZNVER1-NEXT: lretl $4095 # imm = 0xFFF -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - call void asm sideeffect "ret \0A\09 ret $0 \0A\09 lret \0A\09 lret $0", "i"(i16 4095) - ret void -} - -define i8 @test_salc() optsize { -; GENERIC-LABEL: test_salc: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: salc -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_salc: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: salc # sched: [1:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_salc: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: salc # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_salc: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: salc # sched: [1:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_salc: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: salc # sched: [1:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_salc: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: salc # sched: [1:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_salc: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: salc # sched: [1:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_salc: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: salc # sched: [1:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_salc: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: salc # sched: [1:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_salc: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: salc # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_salc: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: salc # sched: [1:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - %1 = tail call i8 asm "salc", "=r"() nounwind - ret i8 %1 -} - -; TODO - test_sgdt -; TODO - test_sidt - -define void @test_xchg_32(i32 %a0, i32 %a1, i32 *%a2) optsize { -; GENERIC-LABEL: test_xchg_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xchgl %eax, %eax -; GENERIC-NEXT: xchgl %ecx, %eax -; GENERIC-NEXT: xchgl %eax, (%edx) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_xchg_32: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: xchgl %eax, %eax # sched: [2:1.00] -; ATOM-NEXT: xchgl %ecx, %eax # sched: [2:1.00] -; ATOM-NEXT: xchgl %eax, (%edx) # sched: [3:1.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_xchg_32: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: xchgl %eax, %eax # sched: [1:0.50] -; SLM-NEXT: xchgl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: xchgl %eax, (%edx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_xchg_32: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: xchgl %eax, %eax # sched: [2:1.00] -; SANDY-NEXT: xchgl %ecx, %eax # sched: [2:1.00] -; SANDY-NEXT: xchgl %eax, (%edx) # sched: [6:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_xchg_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xchgl %eax, %eax # sched: [2:0.75] -; HASWELL-NEXT: xchgl %ecx, %eax # sched: [2:0.75] -; HASWELL-NEXT: xchgl %eax, (%edx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xchg_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xchgl %eax, %eax # sched: [2:0.75] -; BROADWELL-NEXT: xchgl %ecx, %eax # sched: [2:0.75] -; BROADWELL-NEXT: xchgl %eax, (%edx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_xchg_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xchgl %eax, %eax # sched: [2:0.75] -; SKYLAKE-NEXT: xchgl %ecx, %eax # sched: [2:0.75] -; SKYLAKE-NEXT: xchgl %eax, (%edx) # sched: [10:1.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_xchg_32: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: xchgl %eax, %eax # sched: [2:0.75] -; SKX-NEXT: xchgl %ecx, %eax # sched: [2:0.75] -; SKX-NEXT: xchgl %eax, (%edx) # sched: [10:1.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_xchg_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xchgl %eax, %eax # sched: [1:1.00] -; BDVER2-NEXT: xchgl %ecx, %eax # sched: [1:1.00] -; BDVER2-NEXT: xchgl %eax, (%edx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_xchg_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xchgl %eax, %eax # sched: [1:0.50] -; BTVER2-NEXT: xchgl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: xchgl %eax, (%edx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xchg_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xchgl %eax, %eax # sched: [1:0.50] -; ZNVER1-NEXT: xchgl %ecx, %eax # sched: [1:0.50] -; ZNVER1-NEXT: xchgl %eax, (%edx) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm "xchg %EAX, $0 \0A\09 xchg $1, $0 \0A\09 xchg $2, $0", "r,r,*m"(i32 %a0, i32 %a1, i32 *%a2) nounwind - ret void -} Index: test/CodeGen/X86/schedule-x86_64.ll =================================================================== --- test/CodeGen/X86/schedule-x86_64.ll +++ test/CodeGen/X86/schedule-x86_64.ll @@ -1,18893 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define void @test_adc_8(i8 %a0, i8* %a1, i8 %a2) optsize { -; GENERIC-LABEL: test_adc_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: adcb $7, %al # sched: [2:0.67] -; GENERIC-NEXT: adcb $7, %dil # sched: [2:0.67] -; GENERIC-NEXT: adcb $7, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: adcb %dl, %dil # sched: [2:0.67] -; GENERIC-NEXT: adcb %dil, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: adcb (%rsi), %dil # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_adc_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: adcb $7, %al # sched: [1:0.50] -; ATOM-NEXT: adcb $7, %dil # sched: [1:0.50] -; ATOM-NEXT: adcb $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: adcb %dl, %dil # sched: [1:0.50] -; ATOM-NEXT: adcb %dil, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: adcb (%rsi), %dil # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_adc_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: adcb $7, %al # sched: [1:0.50] -; SLM-NEXT: adcb $7, %dil # sched: [1:0.50] -; SLM-NEXT: adcb $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: adcb %dl, %dil # sched: [1:0.50] -; SLM-NEXT: adcb %dil, (%rsi) # sched: [5:2.00] -; SLM-NEXT: adcb (%rsi), %dil # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_adc_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: adcb $7, %al # sched: [2:0.67] -; SANDY-NEXT: adcb $7, %dil # sched: [2:0.67] -; SANDY-NEXT: adcb $7, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: adcb %dl, %dil # sched: [2:0.67] -; SANDY-NEXT: adcb %dil, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: adcb (%rsi), %dil # sched: [7:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_adc_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: adcb $7, %al # sched: [2:0.50] -; HASWELL-NEXT: adcb $7, %dil # sched: [2:0.50] -; HASWELL-NEXT: adcb $7, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: adcb %dl, %dil # sched: [2:0.50] -; HASWELL-NEXT: adcb %dil, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: adcb (%rsi), %dil # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_adc_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: adcb $7, %al # sched: [2:0.50] -; BROADWELL-NEXT: adcb $7, %dil # sched: [2:0.50] -; BROADWELL-NEXT: adcb $7, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: adcb %dl, %dil # sched: [1:0.50] -; BROADWELL-NEXT: adcb %dil, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: adcb (%rsi), %dil # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_adc_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: adcb $7, %al # sched: [2:0.50] -; SKYLAKE-NEXT: adcb $7, %dil # sched: [2:0.50] -; SKYLAKE-NEXT: adcb $7, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: adcb %dl, %dil # sched: [1:0.50] -; SKYLAKE-NEXT: adcb %dil, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: adcb (%rsi), %dil # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_adc_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: adcb $7, %al # sched: [2:0.50] -; SKX-NEXT: adcb $7, %dil # sched: [2:0.50] -; SKX-NEXT: adcb $7, (%rsi) # sched: [8:1.00] -; SKX-NEXT: adcb %dl, %dil # sched: [1:0.50] -; SKX-NEXT: adcb %dil, (%rsi) # sched: [8:1.00] -; SKX-NEXT: adcb (%rsi), %dil # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_adc_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: adcb $7, %al # sched: [1:1.00] -; BDVER2-NEXT: adcb $7, %dil # sched: [1:1.00] -; BDVER2-NEXT: adcb $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: adcb %dl, %dil # sched: [1:1.00] -; BDVER2-NEXT: adcb %dil, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: adcb (%rsi), %dil # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_adc_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: adcb $7, %al # sched: [1:1.00] -; BTVER2-NEXT: adcb $7, %dil # sched: [1:1.00] -; BTVER2-NEXT: adcb $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: adcb %dl, %dil # sched: [1:1.00] -; BTVER2-NEXT: adcb %dil, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: adcb (%rsi), %dil # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_adc_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: adcb $7, %al # sched: [1:0.25] -; ZNVER1-NEXT: adcb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: adcb $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: adcb %dl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: adcb %dil, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: adcb (%rsi), %dil # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "adcb $3, %AL \0A\09 adcb $3, $0 \0A\09 adcb $3, $2 \0A\09 adcb $1, $0 \0A\09 adcb $0, $2 \0A\09 adcb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind - ret void -} -define void @test_adc_16(i16 %a0, i16* %a1, i16 %a2) optsize { -; GENERIC-LABEL: test_adc_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: adcw $511, %ax # imm = 0x1FF -; GENERIC-NEXT: # sched: [2:0.67] -; GENERIC-NEXT: adcw $511, %di # imm = 0x1FF -; GENERIC-NEXT: # sched: [2:0.67] -; GENERIC-NEXT: adcw $511, (%rsi) # imm = 0x1FF -; GENERIC-NEXT: # sched: [9:1.00] -; GENERIC-NEXT: adcw $7, %di # sched: [2:0.67] -; GENERIC-NEXT: adcw $7, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: adcw %dx, %di # sched: [2:0.67] -; GENERIC-NEXT: adcw %di, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: adcw (%rsi), %di # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_adc_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: adcw $511, %ax # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: adcw $511, %di # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: adcw $511, (%rsi) # imm = 0x1FF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: adcw $7, %di # sched: [1:0.50] -; ATOM-NEXT: adcw $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: adcw %dx, %di # sched: [1:0.50] -; ATOM-NEXT: adcw %di, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: adcw (%rsi), %di # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_adc_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: adcw $511, %ax # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: adcw $511, %di # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: adcw $511, (%rsi) # imm = 0x1FF -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: adcw $7, %di # sched: [1:0.50] -; SLM-NEXT: adcw $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: adcw %dx, %di # sched: [1:0.50] -; SLM-NEXT: adcw %di, (%rsi) # sched: [5:2.00] -; SLM-NEXT: adcw (%rsi), %di # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_adc_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: adcw $511, %ax # imm = 0x1FF -; SANDY-NEXT: # sched: [2:0.67] -; SANDY-NEXT: adcw $511, %di # imm = 0x1FF -; SANDY-NEXT: # sched: [2:0.67] -; SANDY-NEXT: adcw $511, (%rsi) # imm = 0x1FF -; SANDY-NEXT: # sched: [9:1.00] -; SANDY-NEXT: adcw $7, %di # sched: [2:0.67] -; SANDY-NEXT: adcw $7, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: adcw %dx, %di # sched: [2:0.67] -; SANDY-NEXT: adcw %di, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: adcw (%rsi), %di # sched: [7:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_adc_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: adcw $511, %ax # imm = 0x1FF -; HASWELL-NEXT: # sched: [2:0.50] -; HASWELL-NEXT: adcw $511, %di # imm = 0x1FF -; HASWELL-NEXT: # sched: [2:0.50] -; HASWELL-NEXT: adcw $511, (%rsi) # imm = 0x1FF -; HASWELL-NEXT: # sched: [9:1.00] -; HASWELL-NEXT: adcw $7, %di # sched: [2:0.50] -; HASWELL-NEXT: adcw $7, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: adcw %dx, %di # sched: [2:0.50] -; HASWELL-NEXT: adcw %di, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: adcw (%rsi), %di # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_adc_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: adcw $511, %ax # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.50] -; BROADWELL-NEXT: adcw $511, %di # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.50] -; BROADWELL-NEXT: adcw $511, (%rsi) # imm = 0x1FF -; BROADWELL-NEXT: # sched: [8:1.00] -; BROADWELL-NEXT: adcw $7, %di # sched: [1:0.50] -; BROADWELL-NEXT: adcw $7, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: adcw %dx, %di # sched: [1:0.50] -; BROADWELL-NEXT: adcw %di, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: adcw (%rsi), %di # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_adc_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: adcw $511, %ax # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.50] -; SKYLAKE-NEXT: adcw $511, %di # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.50] -; SKYLAKE-NEXT: adcw $511, (%rsi) # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [8:1.00] -; SKYLAKE-NEXT: adcw $7, %di # sched: [1:0.50] -; SKYLAKE-NEXT: adcw $7, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: adcw %dx, %di # sched: [1:0.50] -; SKYLAKE-NEXT: adcw %di, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: adcw (%rsi), %di # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_adc_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: adcw $511, %ax # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.50] -; SKX-NEXT: adcw $511, %di # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.50] -; SKX-NEXT: adcw $511, (%rsi) # imm = 0x1FF -; SKX-NEXT: # sched: [8:1.00] -; SKX-NEXT: adcw $7, %di # sched: [1:0.50] -; SKX-NEXT: adcw $7, (%rsi) # sched: [8:1.00] -; SKX-NEXT: adcw %dx, %di # sched: [1:0.50] -; SKX-NEXT: adcw %di, (%rsi) # sched: [8:1.00] -; SKX-NEXT: adcw (%rsi), %di # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_adc_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: adcw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: adcw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: adcw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: adcw $7, %di # sched: [1:1.00] -; BDVER2-NEXT: adcw $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: adcw %dx, %di # sched: [1:1.00] -; BDVER2-NEXT: adcw %di, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: adcw (%rsi), %di # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_adc_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: adcw $511, %ax # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: adcw $511, %di # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: adcw $511, (%rsi) # imm = 0x1FF -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: adcw $7, %di # sched: [1:1.00] -; BTVER2-NEXT: adcw $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: adcw %dx, %di # sched: [1:1.00] -; BTVER2-NEXT: adcw %di, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: adcw (%rsi), %di # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_adc_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: adcw $511, %ax # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: adcw $511, %di # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: adcw $511, (%rsi) # imm = 0x1FF -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: adcw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: adcw $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: adcw %dx, %di # sched: [1:0.25] -; ZNVER1-NEXT: adcw %di, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: adcw (%rsi), %di # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "adcw $3, %AX \0A\09 adcw $3, $0 \0A\09 adcw $3, $2 \0A\09 adcw $4, $0 \0A\09 adcw $4, $2 \0A\09 adcw $1, $0 \0A\09 adcw $0, $2 \0A\09 adcw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind - ret void -} -define void @test_adc_32(i32 %a0, i32* %a1, i32 %a2) optsize { -; GENERIC-LABEL: test_adc_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: adcl $665536, %eax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [2:0.67] -; GENERIC-NEXT: adcl $665536, %edi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [2:0.67] -; GENERIC-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [9:1.00] -; GENERIC-NEXT: adcl $7, %edi # sched: [2:0.67] -; GENERIC-NEXT: adcl $7, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: adcl %edx, %edi # sched: [2:0.67] -; GENERIC-NEXT: adcl %edi, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: adcl (%rsi), %edi # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_adc_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: adcl $665536, %eax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: adcl $665536, %edi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: adcl $7, %edi # sched: [1:0.50] -; ATOM-NEXT: adcl $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: adcl %edx, %edi # sched: [1:0.50] -; ATOM-NEXT: adcl %edi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: adcl (%rsi), %edi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_adc_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: adcl $665536, %eax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: adcl $665536, %edi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: adcl $7, %edi # sched: [1:0.50] -; SLM-NEXT: adcl $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: adcl %edx, %edi # sched: [1:0.50] -; SLM-NEXT: adcl %edi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: adcl (%rsi), %edi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_adc_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: adcl $665536, %eax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [2:0.67] -; SANDY-NEXT: adcl $665536, %edi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [2:0.67] -; SANDY-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [9:1.00] -; SANDY-NEXT: adcl $7, %edi # sched: [2:0.67] -; SANDY-NEXT: adcl $7, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: adcl %edx, %edi # sched: [2:0.67] -; SANDY-NEXT: adcl %edi, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: adcl (%rsi), %edi # sched: [7:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_adc_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: adcl $665536, %eax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [2:0.50] -; HASWELL-NEXT: adcl $665536, %edi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [2:0.50] -; HASWELL-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [9:1.00] -; HASWELL-NEXT: adcl $7, %edi # sched: [2:0.50] -; HASWELL-NEXT: adcl $7, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: adcl %edx, %edi # sched: [2:0.50] -; HASWELL-NEXT: adcl %edi, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: adcl (%rsi), %edi # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_adc_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: adcl $665536, %eax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.50] -; BROADWELL-NEXT: adcl $665536, %edi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.50] -; BROADWELL-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [8:1.00] -; BROADWELL-NEXT: adcl $7, %edi # sched: [1:0.50] -; BROADWELL-NEXT: adcl $7, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: adcl %edx, %edi # sched: [1:0.50] -; BROADWELL-NEXT: adcl %edi, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: adcl (%rsi), %edi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_adc_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: adcl $665536, %eax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.50] -; SKYLAKE-NEXT: adcl $665536, %edi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.50] -; SKYLAKE-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [8:1.00] -; SKYLAKE-NEXT: adcl $7, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: adcl $7, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: adcl %edx, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: adcl %edi, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: adcl (%rsi), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_adc_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: adcl $665536, %eax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.50] -; SKX-NEXT: adcl $665536, %edi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.50] -; SKX-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [8:1.00] -; SKX-NEXT: adcl $7, %edi # sched: [1:0.50] -; SKX-NEXT: adcl $7, (%rsi) # sched: [8:1.00] -; SKX-NEXT: adcl %edx, %edi # sched: [1:0.50] -; SKX-NEXT: adcl %edi, (%rsi) # sched: [8:1.00] -; SKX-NEXT: adcl (%rsi), %edi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_adc_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: adcl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: adcl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: adcl $7, %edi # sched: [1:1.00] -; BDVER2-NEXT: adcl $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: adcl %edx, %edi # sched: [1:1.00] -; BDVER2-NEXT: adcl %edi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: adcl (%rsi), %edi # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_adc_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: adcl $665536, %eax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: adcl $665536, %edi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: adcl $7, %edi # sched: [1:1.00] -; BTVER2-NEXT: adcl $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: adcl %edx, %edi # sched: [1:1.00] -; BTVER2-NEXT: adcl %edi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: adcl (%rsi), %edi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_adc_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: adcl $665536, %eax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: adcl $665536, %edi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: adcl $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: adcl $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: adcl %edx, %edi # sched: [1:0.25] -; ZNVER1-NEXT: adcl %edi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: adcl (%rsi), %edi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "adcl $3, %EAX \0A\09 adcl $3, $0 \0A\09 adcl $3, $2 \0A\09 adcl $4, $0 \0A\09 adcl $4, $2 \0A\09 adcl $1, $0 \0A\09 adcl $0, $2 \0A\09 adcl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind - ret void -} -define void @test_adc_64(i64 %a0, i64* %a1, i64 %a2) optsize { -; GENERIC-LABEL: test_adc_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: adcq $665536, %rax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [2:0.67] -; GENERIC-NEXT: adcq $665536, %rdi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [2:0.67] -; GENERIC-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [9:1.00] -; GENERIC-NEXT: adcq $7, %rdi # sched: [2:0.67] -; GENERIC-NEXT: adcq $7, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: adcq %rdx, %rdi # sched: [2:0.67] -; GENERIC-NEXT: adcq %rdi, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: adcq (%rsi), %rdi # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_adc_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: adcq $665536, %rax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: adcq $665536, %rdi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: adcq $7, %rdi # sched: [1:0.50] -; ATOM-NEXT: adcq $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: adcq %rdx, %rdi # sched: [1:0.50] -; ATOM-NEXT: adcq %rdi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: adcq (%rsi), %rdi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_adc_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: adcq $665536, %rax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: adcq $665536, %rdi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: adcq $7, %rdi # sched: [1:0.50] -; SLM-NEXT: adcq $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: adcq %rdx, %rdi # sched: [1:0.50] -; SLM-NEXT: adcq %rdi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: adcq (%rsi), %rdi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_adc_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: adcq $665536, %rax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [2:0.67] -; SANDY-NEXT: adcq $665536, %rdi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [2:0.67] -; SANDY-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [9:1.00] -; SANDY-NEXT: adcq $7, %rdi # sched: [2:0.67] -; SANDY-NEXT: adcq $7, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: adcq %rdx, %rdi # sched: [2:0.67] -; SANDY-NEXT: adcq %rdi, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: adcq (%rsi), %rdi # sched: [7:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_adc_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: adcq $665536, %rax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [2:0.50] -; HASWELL-NEXT: adcq $665536, %rdi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [2:0.50] -; HASWELL-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [9:1.00] -; HASWELL-NEXT: adcq $7, %rdi # sched: [2:0.50] -; HASWELL-NEXT: adcq $7, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: adcq %rdx, %rdi # sched: [2:0.50] -; HASWELL-NEXT: adcq %rdi, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: adcq (%rsi), %rdi # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_adc_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: adcq $665536, %rax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.50] -; BROADWELL-NEXT: adcq $665536, %rdi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.50] -; BROADWELL-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [8:1.00] -; BROADWELL-NEXT: adcq $7, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: adcq $7, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: adcq %rdx, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: adcq %rdi, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: adcq (%rsi), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_adc_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: adcq $665536, %rax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.50] -; SKYLAKE-NEXT: adcq $665536, %rdi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.50] -; SKYLAKE-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [8:1.00] -; SKYLAKE-NEXT: adcq $7, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: adcq $7, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: adcq %rdx, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: adcq %rdi, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: adcq (%rsi), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_adc_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: adcq $665536, %rax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.50] -; SKX-NEXT: adcq $665536, %rdi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.50] -; SKX-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [8:1.00] -; SKX-NEXT: adcq $7, %rdi # sched: [1:0.50] -; SKX-NEXT: adcq $7, (%rsi) # sched: [8:1.00] -; SKX-NEXT: adcq %rdx, %rdi # sched: [1:0.50] -; SKX-NEXT: adcq %rdi, (%rsi) # sched: [8:1.00] -; SKX-NEXT: adcq (%rsi), %rdi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_adc_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: adcq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: adcq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: adcq $7, %rdi # sched: [1:1.00] -; BDVER2-NEXT: adcq $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: adcq %rdx, %rdi # sched: [1:1.00] -; BDVER2-NEXT: adcq %rdi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: adcq (%rsi), %rdi # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_adc_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: adcq $665536, %rax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: adcq $665536, %rdi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: adcq $7, %rdi # sched: [1:1.00] -; BTVER2-NEXT: adcq $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: adcq %rdx, %rdi # sched: [1:1.00] -; BTVER2-NEXT: adcq %rdi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: adcq (%rsi), %rdi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_adc_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: adcq $665536, %rax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: adcq $665536, %rdi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: adcq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: adcq $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: adcq %rdx, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: adcq %rdi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: adcq (%rsi), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "adcq $3, %RAX \0A\09 adcq $3, $0 \0A\09 adcq $3, $2 \0A\09 adcq $4, $0 \0A\09 adcq $4, $2 \0A\09 adcq $1, $0 \0A\09 adcq $0, $2 \0A\09 adcq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind - ret void -} - -define void @test_add_8(i8 %a0, i8* %a1, i8 %a2) optsize { -; GENERIC-LABEL: test_add_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: addb $7, %al # sched: [1:0.33] -; GENERIC-NEXT: addb $7, %dil # sched: [1:0.33] -; GENERIC-NEXT: addb $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: addb %dl, %dil # sched: [1:0.33] -; GENERIC-NEXT: addb %dil, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: addb (%rsi), %dil # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_add_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: addb $7, %al # sched: [1:0.50] -; ATOM-NEXT: addb $7, %dil # sched: [1:0.50] -; ATOM-NEXT: addb $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: addb %dl, %dil # sched: [1:0.50] -; ATOM-NEXT: addb %dil, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: addb (%rsi), %dil # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_add_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: addb $7, %al # sched: [1:0.50] -; SLM-NEXT: addb $7, %dil # sched: [1:0.50] -; SLM-NEXT: addb $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: addb %dl, %dil # sched: [1:0.50] -; SLM-NEXT: addb %dil, (%rsi) # sched: [5:2.00] -; SLM-NEXT: addb (%rsi), %dil # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_add_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: addb $7, %al # sched: [1:0.33] -; SANDY-NEXT: addb $7, %dil # sched: [1:0.33] -; SANDY-NEXT: addb $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: addb %dl, %dil # sched: [1:0.33] -; SANDY-NEXT: addb %dil, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: addb (%rsi), %dil # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_add_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: addb $7, %al # sched: [1:0.25] -; HASWELL-NEXT: addb $7, %dil # sched: [1:0.25] -; HASWELL-NEXT: addb $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: addb %dl, %dil # sched: [1:0.25] -; HASWELL-NEXT: addb %dil, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: addb (%rsi), %dil # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_add_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: addb $7, %al # sched: [1:0.25] -; BROADWELL-NEXT: addb $7, %dil # sched: [1:0.25] -; BROADWELL-NEXT: addb $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: addb %dl, %dil # sched: [1:0.25] -; BROADWELL-NEXT: addb %dil, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: addb (%rsi), %dil # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_add_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: addb $7, %al # sched: [1:0.25] -; SKYLAKE-NEXT: addb $7, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: addb $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: addb %dl, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: addb %dil, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: addb (%rsi), %dil # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_add_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: addb $7, %al # sched: [1:0.25] -; SKX-NEXT: addb $7, %dil # sched: [1:0.25] -; SKX-NEXT: addb $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: addb %dl, %dil # sched: [1:0.25] -; SKX-NEXT: addb %dil, (%rsi) # sched: [7:1.00] -; SKX-NEXT: addb (%rsi), %dil # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_add_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: addb $7, %al # sched: [1:0.50] -; BDVER2-NEXT: addb $7, %dil # sched: [1:0.50] -; BDVER2-NEXT: addb $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: addb %dl, %dil # sched: [1:0.50] -; BDVER2-NEXT: addb %dil, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: addb (%rsi), %dil # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_add_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: addb $7, %al # sched: [1:0.50] -; BTVER2-NEXT: addb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: addb $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: addb %dl, %dil # sched: [1:0.50] -; BTVER2-NEXT: addb %dil, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: addb (%rsi), %dil # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_add_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: addb $7, %al # sched: [1:0.25] -; ZNVER1-NEXT: addb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: addb $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: addb %dl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: addb %dil, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: addb (%rsi), %dil # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "addb $3, %AL \0A\09 addb $3, $0 \0A\09 addb $3, $2 \0A\09 addb $1, $0 \0A\09 addb $0, $2 \0A\09 addb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind - ret void -} -define void @test_add_16(i16 %a0, i16* %a1, i16 %a2) optsize { -; GENERIC-LABEL: test_add_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: addw $511, %ax # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: addw $511, %di # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: addw $511, (%rsi) # imm = 0x1FF -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: addw $7, %di # sched: [1:0.33] -; GENERIC-NEXT: addw $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: addw %dx, %di # sched: [1:0.33] -; GENERIC-NEXT: addw %di, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: addw (%rsi), %di # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_add_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: addw $511, %ax # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: addw $511, %di # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: addw $511, (%rsi) # imm = 0x1FF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: addw $7, %di # sched: [1:0.50] -; ATOM-NEXT: addw $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: addw %dx, %di # sched: [1:0.50] -; ATOM-NEXT: addw %di, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: addw (%rsi), %di # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_add_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: addw $511, %ax # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: addw $511, %di # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: addw $511, (%rsi) # imm = 0x1FF -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: addw $7, %di # sched: [1:0.50] -; SLM-NEXT: addw $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: addw %dx, %di # sched: [1:0.50] -; SLM-NEXT: addw %di, (%rsi) # sched: [5:2.00] -; SLM-NEXT: addw (%rsi), %di # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_add_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: addw $511, %ax # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: addw $511, %di # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: addw $511, (%rsi) # imm = 0x1FF -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: addw $7, %di # sched: [1:0.33] -; SANDY-NEXT: addw $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: addw %dx, %di # sched: [1:0.33] -; SANDY-NEXT: addw %di, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: addw (%rsi), %di # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_add_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: addw $511, %ax # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: addw $511, %di # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: addw $511, (%rsi) # imm = 0x1FF -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: addw $7, %di # sched: [1:0.25] -; HASWELL-NEXT: addw $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: addw %dx, %di # sched: [1:0.25] -; HASWELL-NEXT: addw %di, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: addw (%rsi), %di # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_add_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: addw $511, %ax # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: addw $511, %di # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: addw $511, (%rsi) # imm = 0x1FF -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: addw $7, %di # sched: [1:0.25] -; BROADWELL-NEXT: addw $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: addw %dx, %di # sched: [1:0.25] -; BROADWELL-NEXT: addw %di, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: addw (%rsi), %di # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_add_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: addw $511, %ax # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: addw $511, %di # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: addw $511, (%rsi) # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: addw $7, %di # sched: [1:0.25] -; SKYLAKE-NEXT: addw $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: addw %dx, %di # sched: [1:0.25] -; SKYLAKE-NEXT: addw %di, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: addw (%rsi), %di # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_add_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: addw $511, %ax # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: addw $511, %di # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: addw $511, (%rsi) # imm = 0x1FF -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: addw $7, %di # sched: [1:0.25] -; SKX-NEXT: addw $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: addw %dx, %di # sched: [1:0.25] -; SKX-NEXT: addw %di, (%rsi) # sched: [7:1.00] -; SKX-NEXT: addw (%rsi), %di # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_add_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: addw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: addw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: addw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: addw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: addw $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: addw %dx, %di # sched: [1:0.50] -; BDVER2-NEXT: addw %di, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: addw (%rsi), %di # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_add_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: addw $511, %ax # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: addw $511, %di # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: addw $511, (%rsi) # imm = 0x1FF -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: addw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: addw $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: addw %dx, %di # sched: [1:0.50] -; BTVER2-NEXT: addw %di, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: addw (%rsi), %di # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_add_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: addw $511, %ax # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: addw $511, %di # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: addw $511, (%rsi) # imm = 0x1FF -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: addw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: addw $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: addw %dx, %di # sched: [1:0.25] -; ZNVER1-NEXT: addw %di, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: addw (%rsi), %di # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "addw $3, %AX \0A\09 addw $3, $0 \0A\09 addw $3, $2 \0A\09 addw $4, $0 \0A\09 addw $4, $2 \0A\09 addw $1, $0 \0A\09 addw $0, $2 \0A\09 addw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind - ret void -} -define void @test_add_32(i32 %a0, i32* %a1, i32 %a2) optsize { -; GENERIC-LABEL: test_add_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: addl $665536, %eax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: addl $665536, %edi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: addl $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: addl $7, %edi # sched: [1:0.33] -; GENERIC-NEXT: addl $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: addl %edx, %edi # sched: [1:0.33] -; GENERIC-NEXT: addl %edi, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: addl (%rsi), %edi # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_add_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: addl $665536, %eax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: addl $665536, %edi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: addl $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: addl $7, %edi # sched: [1:0.50] -; ATOM-NEXT: addl $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: addl %edx, %edi # sched: [1:0.50] -; ATOM-NEXT: addl %edi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: addl (%rsi), %edi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_add_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: addl $665536, %eax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: addl $665536, %edi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: addl $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: addl $7, %edi # sched: [1:0.50] -; SLM-NEXT: addl $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: addl %edx, %edi # sched: [1:0.50] -; SLM-NEXT: addl %edi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: addl (%rsi), %edi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_add_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: addl $665536, %eax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: addl $665536, %edi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: addl $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: addl $7, %edi # sched: [1:0.33] -; SANDY-NEXT: addl $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: addl %edx, %edi # sched: [1:0.33] -; SANDY-NEXT: addl %edi, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: addl (%rsi), %edi # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_add_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: addl $665536, %eax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: addl $665536, %edi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: addl $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: addl $7, %edi # sched: [1:0.25] -; HASWELL-NEXT: addl $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: addl %edx, %edi # sched: [1:0.25] -; HASWELL-NEXT: addl %edi, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: addl (%rsi), %edi # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_add_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: addl $665536, %eax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: addl $665536, %edi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: addl $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: addl $7, %edi # sched: [1:0.25] -; BROADWELL-NEXT: addl $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: addl %edx, %edi # sched: [1:0.25] -; BROADWELL-NEXT: addl %edi, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: addl (%rsi), %edi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_add_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: addl $665536, %eax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: addl $665536, %edi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: addl $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: addl $7, %edi # sched: [1:0.25] -; SKYLAKE-NEXT: addl $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: addl %edx, %edi # sched: [1:0.25] -; SKYLAKE-NEXT: addl %edi, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: addl (%rsi), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_add_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: addl $665536, %eax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: addl $665536, %edi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: addl $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: addl $7, %edi # sched: [1:0.25] -; SKX-NEXT: addl $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: addl %edx, %edi # sched: [1:0.25] -; SKX-NEXT: addl %edi, (%rsi) # sched: [7:1.00] -; SKX-NEXT: addl (%rsi), %edi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_add_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: addl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: addl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: addl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: addl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: addl $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: addl %edx, %edi # sched: [1:0.50] -; BDVER2-NEXT: addl %edi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: addl (%rsi), %edi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_add_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: addl $665536, %eax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: addl $665536, %edi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: addl $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: addl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: addl $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: addl %edx, %edi # sched: [1:0.50] -; BTVER2-NEXT: addl %edi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: addl (%rsi), %edi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_add_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: addl $665536, %eax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: addl $665536, %edi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: addl $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: addl $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: addl $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: addl %edx, %edi # sched: [1:0.25] -; ZNVER1-NEXT: addl %edi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: addl (%rsi), %edi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "addl $3, %EAX \0A\09 addl $3, $0 \0A\09 addl $3, $2 \0A\09 addl $4, $0 \0A\09 addl $4, $2 \0A\09 addl $1, $0 \0A\09 addl $0, $2 \0A\09 addl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind - ret void -} -define void @test_add_64(i64 %a0, i64* %a1, i64 %a2) optsize { -; GENERIC-LABEL: test_add_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: addq $665536, %rax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: addq $665536, %rdi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: addq $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: addq $7, %rdi # sched: [1:0.33] -; GENERIC-NEXT: addq $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: addq %rdx, %rdi # sched: [1:0.33] -; GENERIC-NEXT: addq %rdi, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: addq (%rsi), %rdi # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_add_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: addq $665536, %rax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: addq $665536, %rdi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: addq $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: addq $7, %rdi # sched: [1:0.50] -; ATOM-NEXT: addq $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: addq %rdx, %rdi # sched: [1:0.50] -; ATOM-NEXT: addq %rdi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: addq (%rsi), %rdi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_add_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: addq $665536, %rax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: addq $665536, %rdi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: addq $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: addq $7, %rdi # sched: [1:0.50] -; SLM-NEXT: addq $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: addq %rdx, %rdi # sched: [1:0.50] -; SLM-NEXT: addq %rdi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: addq (%rsi), %rdi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_add_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: addq $665536, %rax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: addq $665536, %rdi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: addq $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: addq $7, %rdi # sched: [1:0.33] -; SANDY-NEXT: addq $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: addq %rdx, %rdi # sched: [1:0.33] -; SANDY-NEXT: addq %rdi, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: addq (%rsi), %rdi # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_add_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: addq $665536, %rax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: addq $665536, %rdi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: addq $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: addq $7, %rdi # sched: [1:0.25] -; HASWELL-NEXT: addq $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: addq %rdx, %rdi # sched: [1:0.25] -; HASWELL-NEXT: addq %rdi, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: addq (%rsi), %rdi # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_add_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: addq $665536, %rax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: addq $665536, %rdi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: addq $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: addq $7, %rdi # sched: [1:0.25] -; BROADWELL-NEXT: addq $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: addq %rdx, %rdi # sched: [1:0.25] -; BROADWELL-NEXT: addq %rdi, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: addq (%rsi), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_add_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: addq $665536, %rax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: addq $665536, %rdi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: addq $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: addq $7, %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: addq $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: addq %rdx, %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: addq %rdi, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: addq (%rsi), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_add_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: addq $665536, %rax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: addq $665536, %rdi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: addq $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: addq $7, %rdi # sched: [1:0.25] -; SKX-NEXT: addq $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: addq %rdx, %rdi # sched: [1:0.25] -; SKX-NEXT: addq %rdi, (%rsi) # sched: [7:1.00] -; SKX-NEXT: addq (%rsi), %rdi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_add_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: addq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: addq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: addq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: addq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: addq $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: addq %rdx, %rdi # sched: [1:0.50] -; BDVER2-NEXT: addq %rdi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: addq (%rsi), %rdi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_add_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: addq $665536, %rax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: addq $665536, %rdi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: addq $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: addq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: addq $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: addq %rdx, %rdi # sched: [1:0.50] -; BTVER2-NEXT: addq %rdi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: addq (%rsi), %rdi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_add_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: addq $665536, %rax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: addq $665536, %rdi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: addq $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: addq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: addq $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: addq %rdx, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: addq %rdi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: addq (%rsi), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "addq $3, %RAX \0A\09 addq $3, $0 \0A\09 addq $3, $2 \0A\09 addq $4, $0 \0A\09 addq $4, $2 \0A\09 addq $1, $0 \0A\09 addq $0, $2 \0A\09 addq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind - ret void -} - -define void @test_and_8(i8 %a0, i8* %a1, i8 %a2) optsize { -; GENERIC-LABEL: test_and_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: andb $7, %al # sched: [1:0.33] -; GENERIC-NEXT: andb $7, %dil # sched: [1:0.33] -; GENERIC-NEXT: andb $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: andb %dl, %dil # sched: [1:0.33] -; GENERIC-NEXT: andb %dil, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: andb (%rsi), %dil # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_and_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: andb $7, %al # sched: [1:0.50] -; ATOM-NEXT: andb $7, %dil # sched: [1:0.50] -; ATOM-NEXT: andb $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: andb %dl, %dil # sched: [1:0.50] -; ATOM-NEXT: andb %dil, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: andb (%rsi), %dil # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_and_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: andb $7, %al # sched: [1:0.50] -; SLM-NEXT: andb $7, %dil # sched: [1:0.50] -; SLM-NEXT: andb $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: andb %dl, %dil # sched: [1:0.50] -; SLM-NEXT: andb %dil, (%rsi) # sched: [5:2.00] -; SLM-NEXT: andb (%rsi), %dil # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_and_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: andb $7, %al # sched: [1:0.33] -; SANDY-NEXT: andb $7, %dil # sched: [1:0.33] -; SANDY-NEXT: andb $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: andb %dl, %dil # sched: [1:0.33] -; SANDY-NEXT: andb %dil, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: andb (%rsi), %dil # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_and_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: andb $7, %al # sched: [1:0.25] -; HASWELL-NEXT: andb $7, %dil # sched: [1:0.25] -; HASWELL-NEXT: andb $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: andb %dl, %dil # sched: [1:0.25] -; HASWELL-NEXT: andb %dil, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: andb (%rsi), %dil # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_and_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: andb $7, %al # sched: [1:0.25] -; BROADWELL-NEXT: andb $7, %dil # sched: [1:0.25] -; BROADWELL-NEXT: andb $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: andb %dl, %dil # sched: [1:0.25] -; BROADWELL-NEXT: andb %dil, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: andb (%rsi), %dil # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_and_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: andb $7, %al # sched: [1:0.25] -; SKYLAKE-NEXT: andb $7, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: andb $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: andb %dl, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: andb %dil, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: andb (%rsi), %dil # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_and_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: andb $7, %al # sched: [1:0.25] -; SKX-NEXT: andb $7, %dil # sched: [1:0.25] -; SKX-NEXT: andb $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: andb %dl, %dil # sched: [1:0.25] -; SKX-NEXT: andb %dil, (%rsi) # sched: [7:1.00] -; SKX-NEXT: andb (%rsi), %dil # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_and_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: andb $7, %al # sched: [1:0.50] -; BDVER2-NEXT: andb $7, %dil # sched: [1:0.50] -; BDVER2-NEXT: andb $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: andb %dl, %dil # sched: [1:0.50] -; BDVER2-NEXT: andb %dil, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: andb (%rsi), %dil # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_and_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: andb $7, %al # sched: [1:0.50] -; BTVER2-NEXT: andb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: andb $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: andb %dl, %dil # sched: [1:0.50] -; BTVER2-NEXT: andb %dil, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: andb (%rsi), %dil # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_and_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: andb $7, %al # sched: [1:0.25] -; ZNVER1-NEXT: andb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: andb $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: andb %dl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: andb %dil, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: andb (%rsi), %dil # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "andb $3, %AL \0A\09 andb $3, $0 \0A\09 andb $3, $2 \0A\09 andb $1, $0 \0A\09 andb $0, $2 \0A\09 andb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind - ret void -} -define void @test_and_16(i16 %a0, i16* %a1, i16 %a2) optsize { -; GENERIC-LABEL: test_and_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: andw $511, %ax # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: andw $511, %di # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: andw $511, (%rsi) # imm = 0x1FF -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: andw $7, %di # sched: [1:0.33] -; GENERIC-NEXT: andw $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: andw %dx, %di # sched: [1:0.33] -; GENERIC-NEXT: andw %di, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: andw (%rsi), %di # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_and_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: andw $511, %ax # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: andw $511, %di # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: andw $511, (%rsi) # imm = 0x1FF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: andw $7, %di # sched: [1:0.50] -; ATOM-NEXT: andw $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: andw %dx, %di # sched: [1:0.50] -; ATOM-NEXT: andw %di, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: andw (%rsi), %di # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_and_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: andw $511, %ax # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: andw $511, %di # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: andw $511, (%rsi) # imm = 0x1FF -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: andw $7, %di # sched: [1:0.50] -; SLM-NEXT: andw $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: andw %dx, %di # sched: [1:0.50] -; SLM-NEXT: andw %di, (%rsi) # sched: [5:2.00] -; SLM-NEXT: andw (%rsi), %di # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_and_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: andw $511, %ax # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: andw $511, %di # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: andw $511, (%rsi) # imm = 0x1FF -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: andw $7, %di # sched: [1:0.33] -; SANDY-NEXT: andw $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: andw %dx, %di # sched: [1:0.33] -; SANDY-NEXT: andw %di, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: andw (%rsi), %di # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_and_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: andw $511, %ax # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: andw $511, %di # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: andw $511, (%rsi) # imm = 0x1FF -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: andw $7, %di # sched: [1:0.25] -; HASWELL-NEXT: andw $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: andw %dx, %di # sched: [1:0.25] -; HASWELL-NEXT: andw %di, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: andw (%rsi), %di # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_and_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: andw $511, %ax # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: andw $511, %di # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: andw $511, (%rsi) # imm = 0x1FF -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: andw $7, %di # sched: [1:0.25] -; BROADWELL-NEXT: andw $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: andw %dx, %di # sched: [1:0.25] -; BROADWELL-NEXT: andw %di, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: andw (%rsi), %di # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_and_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: andw $511, %ax # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: andw $511, %di # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: andw $511, (%rsi) # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: andw $7, %di # sched: [1:0.25] -; SKYLAKE-NEXT: andw $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: andw %dx, %di # sched: [1:0.25] -; SKYLAKE-NEXT: andw %di, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: andw (%rsi), %di # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_and_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: andw $511, %ax # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: andw $511, %di # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: andw $511, (%rsi) # imm = 0x1FF -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: andw $7, %di # sched: [1:0.25] -; SKX-NEXT: andw $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: andw %dx, %di # sched: [1:0.25] -; SKX-NEXT: andw %di, (%rsi) # sched: [7:1.00] -; SKX-NEXT: andw (%rsi), %di # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_and_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: andw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: andw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: andw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: andw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: andw $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: andw %dx, %di # sched: [1:0.50] -; BDVER2-NEXT: andw %di, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: andw (%rsi), %di # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_and_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: andw $511, %ax # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: andw $511, %di # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: andw $511, (%rsi) # imm = 0x1FF -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: andw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: andw $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: andw %dx, %di # sched: [1:0.50] -; BTVER2-NEXT: andw %di, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: andw (%rsi), %di # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_and_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: andw $511, %ax # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: andw $511, %di # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: andw $511, (%rsi) # imm = 0x1FF -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: andw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: andw $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: andw %dx, %di # sched: [1:0.25] -; ZNVER1-NEXT: andw %di, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: andw (%rsi), %di # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "andw $3, %AX \0A\09 andw $3, $0 \0A\09 andw $3, $2 \0A\09 andw $4, $0 \0A\09 andw $4, $2 \0A\09 andw $1, $0 \0A\09 andw $0, $2 \0A\09 andw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind - ret void -} -define void @test_and_32(i32 %a0, i32* %a1, i32 %a2) optsize { -; GENERIC-LABEL: test_and_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: andl $665536, %eax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: andl $665536, %edi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: andl $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: andl $7, %edi # sched: [1:0.33] -; GENERIC-NEXT: andl $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: andl %edx, %edi # sched: [1:0.33] -; GENERIC-NEXT: andl %edi, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: andl (%rsi), %edi # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_and_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: andl $665536, %eax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: andl $665536, %edi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: andl $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: andl $7, %edi # sched: [1:0.50] -; ATOM-NEXT: andl $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: andl %edx, %edi # sched: [1:0.50] -; ATOM-NEXT: andl %edi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: andl (%rsi), %edi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_and_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: andl $665536, %eax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: andl $665536, %edi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: andl $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: andl $7, %edi # sched: [1:0.50] -; SLM-NEXT: andl $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: andl %edx, %edi # sched: [1:0.50] -; SLM-NEXT: andl %edi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: andl (%rsi), %edi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_and_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: andl $665536, %eax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: andl $665536, %edi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: andl $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: andl $7, %edi # sched: [1:0.33] -; SANDY-NEXT: andl $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: andl %edx, %edi # sched: [1:0.33] -; SANDY-NEXT: andl %edi, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: andl (%rsi), %edi # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_and_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: andl $665536, %eax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: andl $665536, %edi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: andl $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: andl $7, %edi # sched: [1:0.25] -; HASWELL-NEXT: andl $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: andl %edx, %edi # sched: [1:0.25] -; HASWELL-NEXT: andl %edi, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: andl (%rsi), %edi # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_and_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: andl $665536, %eax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: andl $665536, %edi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: andl $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: andl $7, %edi # sched: [1:0.25] -; BROADWELL-NEXT: andl $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: andl %edx, %edi # sched: [1:0.25] -; BROADWELL-NEXT: andl %edi, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: andl (%rsi), %edi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_and_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: andl $665536, %eax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: andl $665536, %edi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: andl $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: andl $7, %edi # sched: [1:0.25] -; SKYLAKE-NEXT: andl $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: andl %edx, %edi # sched: [1:0.25] -; SKYLAKE-NEXT: andl %edi, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: andl (%rsi), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_and_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: andl $665536, %eax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: andl $665536, %edi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: andl $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: andl $7, %edi # sched: [1:0.25] -; SKX-NEXT: andl $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: andl %edx, %edi # sched: [1:0.25] -; SKX-NEXT: andl %edi, (%rsi) # sched: [7:1.00] -; SKX-NEXT: andl (%rsi), %edi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_and_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: andl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: andl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: andl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: andl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: andl $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: andl %edx, %edi # sched: [1:0.50] -; BDVER2-NEXT: andl %edi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: andl (%rsi), %edi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_and_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: andl $665536, %eax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: andl $665536, %edi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: andl $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: andl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: andl $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: andl %edx, %edi # sched: [1:0.50] -; BTVER2-NEXT: andl %edi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: andl (%rsi), %edi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_and_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: andl $665536, %eax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: andl $665536, %edi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: andl $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: andl $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: andl $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: andl %edx, %edi # sched: [1:0.25] -; ZNVER1-NEXT: andl %edi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: andl (%rsi), %edi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "andl $3, %EAX \0A\09 andl $3, $0 \0A\09 andl $3, $2 \0A\09 andl $4, $0 \0A\09 andl $4, $2 \0A\09 andl $1, $0 \0A\09 andl $0, $2 \0A\09 andl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind - ret void -} -define void @test_and_64(i64 %a0, i64* %a1, i64 %a2) optsize { -; GENERIC-LABEL: test_and_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: andq $665536, %rax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: andq $665536, %rdi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: andq $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: andq $7, %rdi # sched: [1:0.33] -; GENERIC-NEXT: andq $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: andq %rdx, %rdi # sched: [1:0.33] -; GENERIC-NEXT: andq %rdi, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: andq (%rsi), %rdi # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_and_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: andq $665536, %rax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: andq $665536, %rdi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: andq $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: andq $7, %rdi # sched: [1:0.50] -; ATOM-NEXT: andq $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: andq %rdx, %rdi # sched: [1:0.50] -; ATOM-NEXT: andq %rdi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: andq (%rsi), %rdi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_and_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: andq $665536, %rax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: andq $665536, %rdi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: andq $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: andq $7, %rdi # sched: [1:0.50] -; SLM-NEXT: andq $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: andq %rdx, %rdi # sched: [1:0.50] -; SLM-NEXT: andq %rdi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: andq (%rsi), %rdi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_and_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: andq $665536, %rax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: andq $665536, %rdi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: andq $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: andq $7, %rdi # sched: [1:0.33] -; SANDY-NEXT: andq $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: andq %rdx, %rdi # sched: [1:0.33] -; SANDY-NEXT: andq %rdi, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: andq (%rsi), %rdi # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_and_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: andq $665536, %rax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: andq $665536, %rdi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: andq $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: andq $7, %rdi # sched: [1:0.25] -; HASWELL-NEXT: andq $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: andq %rdx, %rdi # sched: [1:0.25] -; HASWELL-NEXT: andq %rdi, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: andq (%rsi), %rdi # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_and_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: andq $665536, %rax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: andq $665536, %rdi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: andq $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: andq $7, %rdi # sched: [1:0.25] -; BROADWELL-NEXT: andq $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: andq %rdx, %rdi # sched: [1:0.25] -; BROADWELL-NEXT: andq %rdi, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: andq (%rsi), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_and_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: andq $665536, %rax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: andq $665536, %rdi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: andq $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: andq $7, %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: andq $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: andq %rdx, %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: andq %rdi, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: andq (%rsi), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_and_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: andq $665536, %rax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: andq $665536, %rdi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: andq $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: andq $7, %rdi # sched: [1:0.25] -; SKX-NEXT: andq $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: andq %rdx, %rdi # sched: [1:0.25] -; SKX-NEXT: andq %rdi, (%rsi) # sched: [7:1.00] -; SKX-NEXT: andq (%rsi), %rdi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_and_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: andq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: andq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: andq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: andq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: andq $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: andq %rdx, %rdi # sched: [1:0.50] -; BDVER2-NEXT: andq %rdi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: andq (%rsi), %rdi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_and_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: andq $665536, %rax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: andq $665536, %rdi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: andq $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: andq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: andq $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: andq %rdx, %rdi # sched: [1:0.50] -; BTVER2-NEXT: andq %rdi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: andq (%rsi), %rdi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_and_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: andq $665536, %rax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: andq $665536, %rdi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: andq $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: andq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: andq $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: andq %rdx, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: andq %rdi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: andq (%rsi), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "andq $3, %RAX \0A\09 andq $3, $0 \0A\09 andq $3, $2 \0A\09 andq $4, $0 \0A\09 andq $4, $2 \0A\09 andq $1, $0 \0A\09 andq $0, $2 \0A\09 andq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind - ret void -} - -define i16 @test_bsf16(i16 %a0, i16* %a1) optsize { -; GENERIC-LABEL: test_bsf16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: bsfw %di, %ax # sched: [3:1.00] -; GENERIC-NEXT: bsfw (%rsi), %cx # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_bsf16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: bsfw %di, %ax # sched: [16:8.00] -; ATOM-NEXT: bsfw (%rsi), %cx # sched: [16:8.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: orl %ecx, %eax # sched: [1:0.50] -; ATOM-NEXT: # kill: def $ax killed $ax killed $eax -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_bsf16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: bsfw %di, %ax # sched: [10:10.00] -; SLM-NEXT: bsfw (%rsi), %cx # sched: [13:10.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: # kill: def $ax killed $ax killed $eax -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_bsf16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: bsfw %di, %ax # sched: [3:1.00] -; SANDY-NEXT: bsfw (%rsi), %cx # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: # kill: def $ax killed $ax killed $eax -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bsf16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: bsfw %di, %ax # sched: [3:1.00] -; HASWELL-NEXT: bsfw (%rsi), %cx # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bsf16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: bsfw %di, %ax # sched: [3:1.00] -; BROADWELL-NEXT: bsfw (%rsi), %cx # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bsf16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: bsfw %di, %ax # sched: [3:1.00] -; SKYLAKE-NEXT: bsfw (%rsi), %cx # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_bsf16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: bsfw %di, %ax # sched: [3:1.00] -; SKX-NEXT: bsfw (%rsi), %cx # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_bsf16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: bsfw %di, %ax # sched: [3:2.00] -; BDVER2-NEXT: bsfw (%rsi), %cx # sched: [7:2.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_bsf16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: bsfw %di, %ax # sched: [4:4.00] -; BTVER2-NEXT: bsfw (%rsi), %cx # sched: [7:4.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bsf16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: bsfw %di, %ax # sched: [3:0.25] -; ZNVER1-NEXT: bsfw (%rsi), %cx # sched: [7:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call { i16, i16 } asm sideeffect "bsf $2, $0 \0A\09 bsf $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i16 %a0, i16* %a1) - %2 = extractvalue { i16, i16 } %1, 0 - %3 = extractvalue { i16, i16 } %1, 1 - %4 = or i16 %2, %3 - ret i16 %4 -} -define i32 @test_bsf32(i32 %a0, i32* %a1) optsize { -; GENERIC-LABEL: test_bsf32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: bsfl %edi, %eax # sched: [3:1.00] -; GENERIC-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_bsf32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: bsfl %edi, %eax # sched: [16:8.00] -; ATOM-NEXT: bsfl (%rsi), %ecx # sched: [16:8.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: orl %ecx, %eax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_bsf32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: bsfl %edi, %eax # sched: [10:10.00] -; SLM-NEXT: bsfl (%rsi), %ecx # sched: [13:10.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_bsf32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: bsfl %edi, %eax # sched: [3:1.00] -; SANDY-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bsf32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: bsfl %edi, %eax # sched: [3:1.00] -; HASWELL-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bsf32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: bsfl %edi, %eax # sched: [3:1.00] -; BROADWELL-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bsf32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: bsfl %edi, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_bsf32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: bsfl %edi, %eax # sched: [3:1.00] -; SKX-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_bsf32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: bsfl %edi, %eax # sched: [3:2.00] -; BDVER2-NEXT: bsfl (%rsi), %ecx # sched: [7:2.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_bsf32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: bsfl %edi, %eax # sched: [4:4.00] -; BTVER2-NEXT: bsfl (%rsi), %ecx # sched: [7:4.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bsf32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: bsfl %edi, %eax # sched: [3:0.25] -; ZNVER1-NEXT: bsfl (%rsi), %ecx # sched: [7:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call { i32, i32 } asm sideeffect "bsf $2, $0 \0A\09 bsf $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i32 %a0, i32* %a1) - %2 = extractvalue { i32, i32 } %1, 0 - %3 = extractvalue { i32, i32 } %1, 1 - %4 = or i32 %2, %3 - ret i32 %4 -} -define i64 @test_bsf64(i64 %a0, i64* %a1) optsize { -; GENERIC-LABEL: test_bsf64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: bsfq %rdi, %rax # sched: [3:1.00] -; GENERIC-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_bsf64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: bsfq %rdi, %rax # sched: [16:8.00] -; ATOM-NEXT: bsfq (%rsi), %rcx # sched: [16:8.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: orq %rcx, %rax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_bsf64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: bsfq %rdi, %rax # sched: [10:10.00] -; SLM-NEXT: bsfq (%rsi), %rcx # sched: [13:10.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: orq %rcx, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_bsf64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: bsfq %rdi, %rax # sched: [3:1.00] -; SANDY-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: orq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bsf64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: bsfq %rdi, %rax # sched: [3:1.00] -; HASWELL-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bsf64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: bsfq %rdi, %rax # sched: [3:1.00] -; BROADWELL-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bsf64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: bsfq %rdi, %rax # sched: [3:1.00] -; SKYLAKE-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_bsf64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: bsfq %rdi, %rax # sched: [3:1.00] -; SKX-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: orq %rcx, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_bsf64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: bsfq %rdi, %rax # sched: [3:2.00] -; BDVER2-NEXT: bsfq (%rsi), %rcx # sched: [7:2.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_bsf64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: bsfq %rdi, %rax # sched: [4:4.00] -; BTVER2-NEXT: bsfq (%rsi), %rcx # sched: [7:4.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bsf64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: bsfq %rdi, %rax # sched: [3:0.25] -; ZNVER1-NEXT: bsfq (%rsi), %rcx # sched: [7:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call { i64, i64 } asm sideeffect "bsf $2, $0 \0A\09 bsf $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i64 %a0, i64* %a1) - %2 = extractvalue { i64, i64 } %1, 0 - %3 = extractvalue { i64, i64 } %1, 1 - %4 = or i64 %2, %3 - ret i64 %4 -} - -define i16 @test_bsr16(i16 %a0, i16* %a1) optsize { -; GENERIC-LABEL: test_bsr16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: bsrw %di, %ax # sched: [3:1.00] -; GENERIC-NEXT: bsrw (%rsi), %cx # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_bsr16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: bsrw %di, %ax # sched: [16:8.00] -; ATOM-NEXT: bsrw (%rsi), %cx # sched: [16:8.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: orl %ecx, %eax # sched: [1:0.50] -; ATOM-NEXT: # kill: def $ax killed $ax killed $eax -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_bsr16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: bsrw %di, %ax # sched: [10:10.00] -; SLM-NEXT: bsrw (%rsi), %cx # sched: [13:10.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: # kill: def $ax killed $ax killed $eax -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_bsr16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: bsrw %di, %ax # sched: [3:1.00] -; SANDY-NEXT: bsrw (%rsi), %cx # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: # kill: def $ax killed $ax killed $eax -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bsr16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: bsrw %di, %ax # sched: [3:1.00] -; HASWELL-NEXT: bsrw (%rsi), %cx # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bsr16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: bsrw %di, %ax # sched: [3:1.00] -; BROADWELL-NEXT: bsrw (%rsi), %cx # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bsr16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: bsrw %di, %ax # sched: [3:1.00] -; SKYLAKE-NEXT: bsrw (%rsi), %cx # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_bsr16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: bsrw %di, %ax # sched: [3:1.00] -; SKX-NEXT: bsrw (%rsi), %cx # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_bsr16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: bsrw %di, %ax # sched: [4:2.00] -; BDVER2-NEXT: bsrw (%rsi), %cx # sched: [8:2.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_bsr16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: bsrw %di, %ax # sched: [5:4.00] -; BTVER2-NEXT: bsrw (%rsi), %cx # sched: [8:4.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bsr16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: bsrw %di, %ax # sched: [3:0.25] -; ZNVER1-NEXT: bsrw (%rsi), %cx # sched: [7:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call { i16, i16 } asm sideeffect "bsr $2, $0 \0A\09 bsr $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i16 %a0, i16* %a1) - %2 = extractvalue { i16, i16 } %1, 0 - %3 = extractvalue { i16, i16 } %1, 1 - %4 = or i16 %2, %3 - ret i16 %4 -} -define i32 @test_bsr32(i32 %a0, i32* %a1) optsize { -; GENERIC-LABEL: test_bsr32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: bsrl %edi, %eax # sched: [3:1.00] -; GENERIC-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_bsr32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: bsrl %edi, %eax # sched: [16:8.00] -; ATOM-NEXT: bsrl (%rsi), %ecx # sched: [16:8.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: orl %ecx, %eax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_bsr32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: bsrl %edi, %eax # sched: [10:10.00] -; SLM-NEXT: bsrl (%rsi), %ecx # sched: [13:10.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_bsr32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: bsrl %edi, %eax # sched: [3:1.00] -; SANDY-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bsr32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: bsrl %edi, %eax # sched: [3:1.00] -; HASWELL-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bsr32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: bsrl %edi, %eax # sched: [3:1.00] -; BROADWELL-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bsr32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: bsrl %edi, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_bsr32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: bsrl %edi, %eax # sched: [3:1.00] -; SKX-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_bsr32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: bsrl %edi, %eax # sched: [4:2.00] -; BDVER2-NEXT: bsrl (%rsi), %ecx # sched: [8:2.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_bsr32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: bsrl %edi, %eax # sched: [5:4.00] -; BTVER2-NEXT: bsrl (%rsi), %ecx # sched: [8:4.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bsr32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: bsrl %edi, %eax # sched: [3:0.25] -; ZNVER1-NEXT: bsrl (%rsi), %ecx # sched: [7:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call { i32, i32 } asm sideeffect "bsr $2, $0 \0A\09 bsr $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i32 %a0, i32* %a1) - %2 = extractvalue { i32, i32 } %1, 0 - %3 = extractvalue { i32, i32 } %1, 1 - %4 = or i32 %2, %3 - ret i32 %4 -} -define i64 @test_bsr64(i64 %a0, i64* %a1) optsize { -; GENERIC-LABEL: test_bsr64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: bsrq %rdi, %rax # sched: [3:1.00] -; GENERIC-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_bsr64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: bsrq %rdi, %rax # sched: [16:8.00] -; ATOM-NEXT: bsrq (%rsi), %rcx # sched: [16:8.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: orq %rcx, %rax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_bsr64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: bsrq %rdi, %rax # sched: [10:10.00] -; SLM-NEXT: bsrq (%rsi), %rcx # sched: [13:10.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: orq %rcx, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_bsr64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: bsrq %rdi, %rax # sched: [3:1.00] -; SANDY-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: orq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bsr64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: bsrq %rdi, %rax # sched: [3:1.00] -; HASWELL-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bsr64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: bsrq %rdi, %rax # sched: [3:1.00] -; BROADWELL-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bsr64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: bsrq %rdi, %rax # sched: [3:1.00] -; SKYLAKE-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_bsr64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: bsrq %rdi, %rax # sched: [3:1.00] -; SKX-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: orq %rcx, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_bsr64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: bsrq %rdi, %rax # sched: [4:2.00] -; BDVER2-NEXT: bsrq (%rsi), %rcx # sched: [8:2.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_bsr64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: bsrq %rdi, %rax # sched: [5:4.00] -; BTVER2-NEXT: bsrq (%rsi), %rcx # sched: [8:4.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bsr64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: bsrq %rdi, %rax # sched: [3:0.25] -; ZNVER1-NEXT: bsrq (%rsi), %rcx # sched: [7:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call { i64, i64 } asm sideeffect "bsr $2, $0 \0A\09 bsr $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i64 %a0, i64* %a1) - %2 = extractvalue { i64, i64 } %1, 0 - %3 = extractvalue { i64, i64 } %1, 1 - %4 = or i64 %2, %3 - ret i64 %4 -} - -define i32 @test_bswap32(i32 %a0) optsize { -; GENERIC-LABEL: test_bswap32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: bswapl %eax # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_bswap32: -; ATOM: # %bb.0: -; ATOM-NEXT: movl %edi, %eax # sched: [1:0.50] -; ATOM-NEXT: bswapl %eax # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_bswap32: -; SLM: # %bb.0: -; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] -; SLM-NEXT: bswapl %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_bswap32: -; SANDY: # %bb.0: -; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: bswapl %eax # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bswap32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; HASWELL-NEXT: bswapl %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bswap32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; BROADWELL-NEXT: bswapl %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bswap32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: bswapl %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_bswap32: -; SKX: # %bb.0: -; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: bswapl %eax # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_bswap32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.50] -; BDVER2-NEXT: bswapl %eax # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_bswap32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] -; BTVER2-NEXT: bswapl %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bswap32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] -; ZNVER1-NEXT: bswapl %eax # sched: [1:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = tail call i32 asm "bswap $0", "=r,0"(i32 %a0) nounwind - ret i32 %1 -} -define i64 @test_bswap64(i64 %a0) optsize { -; GENERIC-LABEL: test_bswap64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: bswapq %rax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_bswap64: -; ATOM: # %bb.0: -; ATOM-NEXT: movq %rdi, %rax # sched: [1:0.50] -; ATOM-NEXT: bswapq %rax # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_bswap64: -; SLM: # %bb.0: -; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] -; SLM-NEXT: bswapq %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_bswap64: -; SANDY: # %bb.0: -; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] -; SANDY-NEXT: bswapq %rax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bswap64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HASWELL-NEXT: bswapq %rax # sched: [2:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bswap64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] -; BROADWELL-NEXT: bswapq %rax # sched: [2:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bswap64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: bswapq %rax # sched: [2:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_bswap64: -; SKX: # %bb.0: -; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKX-NEXT: bswapq %rax # sched: [2:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_bswap64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BDVER2-NEXT: bswapq %rax # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_bswap64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: bswapq %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bswap64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] -; ZNVER1-NEXT: bswapq %rax # sched: [1:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = tail call i64 asm "bswap $0", "=r,0"(i64 %a0) nounwind - ret i64 %1 -} - -define void @test_bt_btc_btr_bts_16(i16 %a0, i16 %a1, i16 *%a2) optsize { -; GENERIC-LABEL: test_bt_btc_btr_bts_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: btw %si, %di # sched: [1:0.50] -; GENERIC-NEXT: btcw %si, %di # sched: [1:0.50] -; GENERIC-NEXT: btrw %si, %di # sched: [1:0.50] -; GENERIC-NEXT: btsw %si, %di # sched: [1:0.50] -; GENERIC-NEXT: btw %si, (%rdx) # sched: [9:1.00] -; GENERIC-NEXT: btcw %si, (%rdx) # sched: [9:1.00] -; GENERIC-NEXT: btrw %si, (%rdx) # sched: [9:1.00] -; GENERIC-NEXT: btsw %si, (%rdx) # sched: [9:1.00] -; GENERIC-NEXT: btw $7, %di # sched: [1:0.50] -; GENERIC-NEXT: btcw $7, %di # sched: [1:0.50] -; GENERIC-NEXT: btrw $7, %di # sched: [1:0.50] -; GENERIC-NEXT: btsw $7, %di # sched: [1:0.50] -; GENERIC-NEXT: btw $7, (%rdx) # sched: [6:0.50] -; GENERIC-NEXT: btcw $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: btrw $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: btsw $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_bt_btc_btr_bts_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: btw %si, %di # sched: [1:1.00] -; ATOM-NEXT: btcw %si, %di # sched: [1:1.00] -; ATOM-NEXT: btrw %si, %di # sched: [1:1.00] -; ATOM-NEXT: btsw %si, %di # sched: [1:1.00] -; ATOM-NEXT: btw %si, (%rdx) # sched: [9:4.50] -; ATOM-NEXT: btcw %si, (%rdx) # sched: [11:5.50] -; ATOM-NEXT: btrw %si, (%rdx) # sched: [11:5.50] -; ATOM-NEXT: btsw %si, (%rdx) # sched: [11:5.50] -; ATOM-NEXT: btw $7, %di # sched: [1:1.00] -; ATOM-NEXT: btcw $7, %di # sched: [1:1.00] -; ATOM-NEXT: btrw $7, %di # sched: [1:1.00] -; ATOM-NEXT: btsw $7, %di # sched: [1:1.00] -; ATOM-NEXT: btw $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: btcw $7, (%rdx) # sched: [2:1.00] -; ATOM-NEXT: btrw $7, (%rdx) # sched: [2:1.00] -; ATOM-NEXT: btsw $7, (%rdx) # sched: [2:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_bt_btc_btr_bts_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: btw %si, %di # sched: [1:0.50] -; SLM-NEXT: btcw %si, %di # sched: [1:0.50] -; SLM-NEXT: btrw %si, %di # sched: [1:0.50] -; SLM-NEXT: btsw %si, %di # sched: [1:0.50] -; SLM-NEXT: btw %si, (%rdx) # sched: [4:1.00] -; SLM-NEXT: btcw %si, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btrw %si, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btsw %si, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btw $7, %di # sched: [1:0.50] -; SLM-NEXT: btcw $7, %di # sched: [1:0.50] -; SLM-NEXT: btrw $7, %di # sched: [1:0.50] -; SLM-NEXT: btsw $7, %di # sched: [1:0.50] -; SLM-NEXT: btw $7, (%rdx) # sched: [4:1.00] -; SLM-NEXT: btcw $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btrw $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btsw $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_bt_btc_btr_bts_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: btw %si, %di # sched: [1:0.50] -; SANDY-NEXT: btcw %si, %di # sched: [1:0.50] -; SANDY-NEXT: btrw %si, %di # sched: [1:0.50] -; SANDY-NEXT: btsw %si, %di # sched: [1:0.50] -; SANDY-NEXT: btw %si, (%rdx) # sched: [9:1.00] -; SANDY-NEXT: btcw %si, (%rdx) # sched: [9:1.00] -; SANDY-NEXT: btrw %si, (%rdx) # sched: [9:1.00] -; SANDY-NEXT: btsw %si, (%rdx) # sched: [9:1.00] -; SANDY-NEXT: btw $7, %di # sched: [1:0.50] -; SANDY-NEXT: btcw $7, %di # sched: [1:0.50] -; SANDY-NEXT: btrw $7, %di # sched: [1:0.50] -; SANDY-NEXT: btsw $7, %di # sched: [1:0.50] -; SANDY-NEXT: btw $7, (%rdx) # sched: [6:0.50] -; SANDY-NEXT: btcw $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: btrw $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: btsw $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bt_btc_btr_bts_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: btw %si, %di # sched: [1:0.50] -; HASWELL-NEXT: btcw %si, %di # sched: [1:0.50] -; HASWELL-NEXT: btrw %si, %di # sched: [1:0.50] -; HASWELL-NEXT: btsw %si, %di # sched: [1:0.50] -; HASWELL-NEXT: btw %si, (%rdx) # sched: [1:2.50] -; HASWELL-NEXT: btcw %si, (%rdx) # sched: [1:2.75] -; HASWELL-NEXT: btrw %si, (%rdx) # sched: [1:2.75] -; HASWELL-NEXT: btsw %si, (%rdx) # sched: [1:2.75] -; HASWELL-NEXT: btw $7, %di # sched: [1:0.50] -; HASWELL-NEXT: btcw $7, %di # sched: [1:0.50] -; HASWELL-NEXT: btrw $7, %di # sched: [1:0.50] -; HASWELL-NEXT: btsw $7, %di # sched: [1:0.50] -; HASWELL-NEXT: btw $7, (%rdx) # sched: [6:0.50] -; HASWELL-NEXT: btcw $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: btrw $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: btsw $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bt_btc_btr_bts_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: btw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: btcw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: btrw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: btsw %si, %di # sched: [1:0.50] -; BROADWELL-NEXT: btw %si, (%rdx) # sched: [6:0.50] -; BROADWELL-NEXT: btcw %si, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btrw %si, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btsw %si, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btw $7, %di # sched: [1:0.50] -; BROADWELL-NEXT: btcw $7, %di # sched: [1:0.50] -; BROADWELL-NEXT: btrw $7, %di # sched: [1:0.50] -; BROADWELL-NEXT: btsw $7, %di # sched: [1:0.50] -; BROADWELL-NEXT: btw $7, (%rdx) # sched: [6:0.50] -; BROADWELL-NEXT: btcw $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btrw $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btsw $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bt_btc_btr_bts_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: btw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: btcw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: btrw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: btsw %si, %di # sched: [1:0.50] -; SKYLAKE-NEXT: btw %si, (%rdx) # sched: [6:0.50] -; SKYLAKE-NEXT: btcw %si, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btrw %si, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btsw %si, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btw $7, %di # sched: [1:0.50] -; SKYLAKE-NEXT: btcw $7, %di # sched: [1:0.50] -; SKYLAKE-NEXT: btrw $7, %di # sched: [1:0.50] -; SKYLAKE-NEXT: btsw $7, %di # sched: [1:0.50] -; SKYLAKE-NEXT: btw $7, (%rdx) # sched: [6:0.50] -; SKYLAKE-NEXT: btcw $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btrw $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btsw $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_bt_btc_btr_bts_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: btw %si, %di # sched: [1:0.50] -; SKX-NEXT: btcw %si, %di # sched: [1:0.50] -; SKX-NEXT: btrw %si, %di # sched: [1:0.50] -; SKX-NEXT: btsw %si, %di # sched: [1:0.50] -; SKX-NEXT: btw %si, (%rdx) # sched: [6:0.50] -; SKX-NEXT: btcw %si, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btrw %si, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btsw %si, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btw $7, %di # sched: [1:0.50] -; SKX-NEXT: btcw $7, %di # sched: [1:0.50] -; SKX-NEXT: btrw $7, %di # sched: [1:0.50] -; SKX-NEXT: btsw $7, %di # sched: [1:0.50] -; SKX-NEXT: btw $7, (%rdx) # sched: [6:0.50] -; SKX-NEXT: btcw $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btrw $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btsw $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_bt_btc_btr_bts_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: btw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: btcw %si, %di # sched: [2:0.50] -; BDVER2-NEXT: btrw %si, %di # sched: [2:0.50] -; BDVER2-NEXT: btsw %si, %di # sched: [2:0.50] -; BDVER2-NEXT: btw %si, (%rdx) # sched: [5:0.50] -; BDVER2-NEXT: btcw %si, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btrw %si, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btsw %si, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: btcw $7, %di # sched: [2:0.50] -; BDVER2-NEXT: btrw $7, %di # sched: [2:0.50] -; BDVER2-NEXT: btsw $7, %di # sched: [2:0.50] -; BDVER2-NEXT: btw $7, (%rdx) # sched: [5:0.50] -; BDVER2-NEXT: btcw $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btrw $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btsw $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_bt_btc_btr_bts_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: btw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: btcw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: btrw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: btsw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: btw %si, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: btcw %si, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btrw %si, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btsw %si, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: btcw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: btrw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: btsw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: btw $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: btcw $7, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btrw $7, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btsw $7, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bt_btc_btr_bts_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: btw %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: btcw %si, %di # sched: [2:0.25] -; ZNVER1-NEXT: btrw %si, %di # sched: [2:0.25] -; ZNVER1-NEXT: btsw %si, %di # sched: [2:0.25] -; ZNVER1-NEXT: btw %si, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: btcw %si, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btrw %si, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btsw %si, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: btcw $7, %di # sched: [2:0.25] -; ZNVER1-NEXT: btrw $7, %di # sched: [2:0.25] -; ZNVER1-NEXT: btsw $7, %di # sched: [2:0.25] -; ZNVER1-NEXT: btw $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: btcw $7, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btrw $7, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btsw $7, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "btw $1, $0 \0A\09 btcw $1, $0 \0A\09 btrw $1, $0 \0A\09 btsw $1, $0 \0A\09 btw $1, $2 \0A\09 btcw $1, $2 \0A\09 btrw $1, $2 \0A\09 btsw $1, $2 \0A\09 btw $3, $0 \0A\09 btcw $3, $0 \0A\09 btrw $3, $0 \0A\09 btsw $3, $0 \0A\09 btw $3, $2 \0A\09 btcw $3, $2 \0A\09 btrw $3, $2 \0A\09 btsw $3, $2", "r,r,*m,i"(i16 %a0, i16 %a1, i16 *%a2, i8 7) - ret void -} -define void @test_bt_btc_btr_bts_32(i32 %a0, i32 %a1, i32 *%a2) optsize { -; GENERIC-LABEL: test_bt_btc_btr_bts_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: btl %esi, %edi # sched: [1:0.50] -; GENERIC-NEXT: btcl %esi, %edi # sched: [1:0.50] -; GENERIC-NEXT: btrl %esi, %edi # sched: [1:0.50] -; GENERIC-NEXT: btsl %esi, %edi # sched: [1:0.50] -; GENERIC-NEXT: btl %esi, (%rdx) # sched: [9:1.00] -; GENERIC-NEXT: btcl %esi, (%rdx) # sched: [9:1.00] -; GENERIC-NEXT: btrl %esi, (%rdx) # sched: [9:1.00] -; GENERIC-NEXT: btsl %esi, (%rdx) # sched: [9:1.00] -; GENERIC-NEXT: btl $7, %edi # sched: [1:0.50] -; GENERIC-NEXT: btcl $7, %edi # sched: [1:0.50] -; GENERIC-NEXT: btrl $7, %edi # sched: [1:0.50] -; GENERIC-NEXT: btsl $7, %edi # sched: [1:0.50] -; GENERIC-NEXT: btl $7, (%rdx) # sched: [6:0.50] -; GENERIC-NEXT: btcl $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: btrl $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: btsl $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_bt_btc_btr_bts_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: btl %esi, %edi # sched: [1:1.00] -; ATOM-NEXT: btcl %esi, %edi # sched: [1:1.00] -; ATOM-NEXT: btrl %esi, %edi # sched: [1:1.00] -; ATOM-NEXT: btsl %esi, %edi # sched: [1:1.00] -; ATOM-NEXT: btl %esi, (%rdx) # sched: [9:4.50] -; ATOM-NEXT: btcl %esi, (%rdx) # sched: [11:5.50] -; ATOM-NEXT: btrl %esi, (%rdx) # sched: [11:5.50] -; ATOM-NEXT: btsl %esi, (%rdx) # sched: [11:5.50] -; ATOM-NEXT: btl $7, %edi # sched: [1:1.00] -; ATOM-NEXT: btcl $7, %edi # sched: [1:1.00] -; ATOM-NEXT: btrl $7, %edi # sched: [1:1.00] -; ATOM-NEXT: btsl $7, %edi # sched: [1:1.00] -; ATOM-NEXT: btl $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: btcl $7, (%rdx) # sched: [2:1.00] -; ATOM-NEXT: btrl $7, (%rdx) # sched: [2:1.00] -; ATOM-NEXT: btsl $7, (%rdx) # sched: [2:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_bt_btc_btr_bts_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: btl %esi, %edi # sched: [1:0.50] -; SLM-NEXT: btcl %esi, %edi # sched: [1:0.50] -; SLM-NEXT: btrl %esi, %edi # sched: [1:0.50] -; SLM-NEXT: btsl %esi, %edi # sched: [1:0.50] -; SLM-NEXT: btl %esi, (%rdx) # sched: [4:1.00] -; SLM-NEXT: btcl %esi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btrl %esi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btsl %esi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btl $7, %edi # sched: [1:0.50] -; SLM-NEXT: btcl $7, %edi # sched: [1:0.50] -; SLM-NEXT: btrl $7, %edi # sched: [1:0.50] -; SLM-NEXT: btsl $7, %edi # sched: [1:0.50] -; SLM-NEXT: btl $7, (%rdx) # sched: [4:1.00] -; SLM-NEXT: btcl $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btrl $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btsl $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_bt_btc_btr_bts_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: btl %esi, %edi # sched: [1:0.50] -; SANDY-NEXT: btcl %esi, %edi # sched: [1:0.50] -; SANDY-NEXT: btrl %esi, %edi # sched: [1:0.50] -; SANDY-NEXT: btsl %esi, %edi # sched: [1:0.50] -; SANDY-NEXT: btl %esi, (%rdx) # sched: [9:1.00] -; SANDY-NEXT: btcl %esi, (%rdx) # sched: [9:1.00] -; SANDY-NEXT: btrl %esi, (%rdx) # sched: [9:1.00] -; SANDY-NEXT: btsl %esi, (%rdx) # sched: [9:1.00] -; SANDY-NEXT: btl $7, %edi # sched: [1:0.50] -; SANDY-NEXT: btcl $7, %edi # sched: [1:0.50] -; SANDY-NEXT: btrl $7, %edi # sched: [1:0.50] -; SANDY-NEXT: btsl $7, %edi # sched: [1:0.50] -; SANDY-NEXT: btl $7, (%rdx) # sched: [6:0.50] -; SANDY-NEXT: btcl $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: btrl $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: btsl $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bt_btc_btr_bts_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: btl %esi, %edi # sched: [1:0.50] -; HASWELL-NEXT: btcl %esi, %edi # sched: [1:0.50] -; HASWELL-NEXT: btrl %esi, %edi # sched: [1:0.50] -; HASWELL-NEXT: btsl %esi, %edi # sched: [1:0.50] -; HASWELL-NEXT: btl %esi, (%rdx) # sched: [1:2.50] -; HASWELL-NEXT: btcl %esi, (%rdx) # sched: [1:2.75] -; HASWELL-NEXT: btrl %esi, (%rdx) # sched: [1:2.75] -; HASWELL-NEXT: btsl %esi, (%rdx) # sched: [1:2.75] -; HASWELL-NEXT: btl $7, %edi # sched: [1:0.50] -; HASWELL-NEXT: btcl $7, %edi # sched: [1:0.50] -; HASWELL-NEXT: btrl $7, %edi # sched: [1:0.50] -; HASWELL-NEXT: btsl $7, %edi # sched: [1:0.50] -; HASWELL-NEXT: btl $7, (%rdx) # sched: [6:0.50] -; HASWELL-NEXT: btcl $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: btrl $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: btsl $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bt_btc_btr_bts_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: btl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: btcl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: btrl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: btsl %esi, %edi # sched: [1:0.50] -; BROADWELL-NEXT: btl %esi, (%rdx) # sched: [6:0.50] -; BROADWELL-NEXT: btcl %esi, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btrl %esi, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btsl %esi, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btl $7, %edi # sched: [1:0.50] -; BROADWELL-NEXT: btcl $7, %edi # sched: [1:0.50] -; BROADWELL-NEXT: btrl $7, %edi # sched: [1:0.50] -; BROADWELL-NEXT: btsl $7, %edi # sched: [1:0.50] -; BROADWELL-NEXT: btl $7, (%rdx) # sched: [6:0.50] -; BROADWELL-NEXT: btcl $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btrl $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btsl $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bt_btc_btr_bts_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: btl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: btcl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: btrl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: btsl %esi, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: btl %esi, (%rdx) # sched: [6:0.50] -; SKYLAKE-NEXT: btcl %esi, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btrl %esi, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btsl %esi, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btl $7, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: btcl $7, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: btrl $7, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: btsl $7, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: btl $7, (%rdx) # sched: [6:0.50] -; SKYLAKE-NEXT: btcl $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btrl $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btsl $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_bt_btc_btr_bts_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: btl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: btcl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: btrl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: btsl %esi, %edi # sched: [1:0.50] -; SKX-NEXT: btl %esi, (%rdx) # sched: [6:0.50] -; SKX-NEXT: btcl %esi, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btrl %esi, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btsl %esi, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btl $7, %edi # sched: [1:0.50] -; SKX-NEXT: btcl $7, %edi # sched: [1:0.50] -; SKX-NEXT: btrl $7, %edi # sched: [1:0.50] -; SKX-NEXT: btsl $7, %edi # sched: [1:0.50] -; SKX-NEXT: btl $7, (%rdx) # sched: [6:0.50] -; SKX-NEXT: btcl $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btrl $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btsl $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_bt_btc_btr_bts_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: btl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: btcl %esi, %edi # sched: [2:0.50] -; BDVER2-NEXT: btrl %esi, %edi # sched: [2:0.50] -; BDVER2-NEXT: btsl %esi, %edi # sched: [2:0.50] -; BDVER2-NEXT: btl %esi, (%rdx) # sched: [5:0.50] -; BDVER2-NEXT: btcl %esi, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btrl %esi, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btsl %esi, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: btcl $7, %edi # sched: [2:0.50] -; BDVER2-NEXT: btrl $7, %edi # sched: [2:0.50] -; BDVER2-NEXT: btsl $7, %edi # sched: [2:0.50] -; BDVER2-NEXT: btl $7, (%rdx) # sched: [5:0.50] -; BDVER2-NEXT: btcl $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btrl $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btsl $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_bt_btc_btr_bts_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: btl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: btcl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: btrl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: btsl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: btl %esi, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: btcl %esi, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btrl %esi, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btsl %esi, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: btcl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: btrl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: btsl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: btl $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: btcl $7, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btrl $7, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btsl $7, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bt_btc_btr_bts_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: btl %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: btcl %esi, %edi # sched: [2:0.25] -; ZNVER1-NEXT: btrl %esi, %edi # sched: [2:0.25] -; ZNVER1-NEXT: btsl %esi, %edi # sched: [2:0.25] -; ZNVER1-NEXT: btl %esi, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: btcl %esi, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btrl %esi, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btsl %esi, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btl $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: btcl $7, %edi # sched: [2:0.25] -; ZNVER1-NEXT: btrl $7, %edi # sched: [2:0.25] -; ZNVER1-NEXT: btsl $7, %edi # sched: [2:0.25] -; ZNVER1-NEXT: btl $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: btcl $7, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btrl $7, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btsl $7, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "btl $1, $0 \0A\09 btcl $1, $0 \0A\09 btrl $1, $0 \0A\09 btsl $1, $0 \0A\09 btl $1, $2 \0A\09 btcl $1, $2 \0A\09 btrl $1, $2 \0A\09 btsl $1, $2 \0A\09 btl $3, $0 \0A\09 btcl $3, $0 \0A\09 btrl $3, $0 \0A\09 btsl $3, $0 \0A\09 btl $3, $2 \0A\09 btcl $3, $2 \0A\09 btrl $3, $2 \0A\09 btsl $3, $2", "r,r,*m,i"(i32 %a0, i32 %a1, i32 *%a2, i8 7) - ret void -} -define void @test_bt_btc_btr_bts_64(i64 %a0, i64 %a1, i64 *%a2) optsize { -; GENERIC-LABEL: test_bt_btc_btr_bts_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: btq %rsi, %rdi # sched: [1:0.50] -; GENERIC-NEXT: btcq %rsi, %rdi # sched: [1:0.50] -; GENERIC-NEXT: btrq %rsi, %rdi # sched: [1:0.50] -; GENERIC-NEXT: btsq %rsi, %rdi # sched: [1:0.50] -; GENERIC-NEXT: btq %rsi, (%rdx) # sched: [9:1.00] -; GENERIC-NEXT: btcq %rsi, (%rdx) # sched: [9:1.00] -; GENERIC-NEXT: btrq %rsi, (%rdx) # sched: [9:1.00] -; GENERIC-NEXT: btsq %rsi, (%rdx) # sched: [9:1.00] -; GENERIC-NEXT: btq $7, %rdi # sched: [1:0.50] -; GENERIC-NEXT: btcq $7, %rdi # sched: [1:0.50] -; GENERIC-NEXT: btrq $7, %rdi # sched: [1:0.50] -; GENERIC-NEXT: btsq $7, %rdi # sched: [1:0.50] -; GENERIC-NEXT: btq $7, (%rdx) # sched: [6:0.50] -; GENERIC-NEXT: btcq $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: btrq $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: btsq $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_bt_btc_btr_bts_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: btq %rsi, %rdi # sched: [1:1.00] -; ATOM-NEXT: btcq %rsi, %rdi # sched: [1:1.00] -; ATOM-NEXT: btrq %rsi, %rdi # sched: [1:1.00] -; ATOM-NEXT: btsq %rsi, %rdi # sched: [1:1.00] -; ATOM-NEXT: btq %rsi, (%rdx) # sched: [9:4.50] -; ATOM-NEXT: btcq %rsi, (%rdx) # sched: [11:5.50] -; ATOM-NEXT: btrq %rsi, (%rdx) # sched: [11:5.50] -; ATOM-NEXT: btsq %rsi, (%rdx) # sched: [11:5.50] -; ATOM-NEXT: btq $7, %rdi # sched: [1:1.00] -; ATOM-NEXT: btcq $7, %rdi # sched: [1:1.00] -; ATOM-NEXT: btrq $7, %rdi # sched: [1:1.00] -; ATOM-NEXT: btsq $7, %rdi # sched: [1:1.00] -; ATOM-NEXT: btq $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: btcq $7, (%rdx) # sched: [2:1.00] -; ATOM-NEXT: btrq $7, (%rdx) # sched: [2:1.00] -; ATOM-NEXT: btsq $7, (%rdx) # sched: [2:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_bt_btc_btr_bts_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: btq %rsi, %rdi # sched: [1:0.50] -; SLM-NEXT: btcq %rsi, %rdi # sched: [1:0.50] -; SLM-NEXT: btrq %rsi, %rdi # sched: [1:0.50] -; SLM-NEXT: btsq %rsi, %rdi # sched: [1:0.50] -; SLM-NEXT: btq %rsi, (%rdx) # sched: [4:1.00] -; SLM-NEXT: btcq %rsi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btrq %rsi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btsq %rsi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btq $7, %rdi # sched: [1:0.50] -; SLM-NEXT: btcq $7, %rdi # sched: [1:0.50] -; SLM-NEXT: btrq $7, %rdi # sched: [1:0.50] -; SLM-NEXT: btsq $7, %rdi # sched: [1:0.50] -; SLM-NEXT: btq $7, (%rdx) # sched: [4:1.00] -; SLM-NEXT: btcq $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btrq $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: btsq $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_bt_btc_btr_bts_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: btq %rsi, %rdi # sched: [1:0.50] -; SANDY-NEXT: btcq %rsi, %rdi # sched: [1:0.50] -; SANDY-NEXT: btrq %rsi, %rdi # sched: [1:0.50] -; SANDY-NEXT: btsq %rsi, %rdi # sched: [1:0.50] -; SANDY-NEXT: btq %rsi, (%rdx) # sched: [9:1.00] -; SANDY-NEXT: btcq %rsi, (%rdx) # sched: [9:1.00] -; SANDY-NEXT: btrq %rsi, (%rdx) # sched: [9:1.00] -; SANDY-NEXT: btsq %rsi, (%rdx) # sched: [9:1.00] -; SANDY-NEXT: btq $7, %rdi # sched: [1:0.50] -; SANDY-NEXT: btcq $7, %rdi # sched: [1:0.50] -; SANDY-NEXT: btrq $7, %rdi # sched: [1:0.50] -; SANDY-NEXT: btsq $7, %rdi # sched: [1:0.50] -; SANDY-NEXT: btq $7, (%rdx) # sched: [6:0.50] -; SANDY-NEXT: btcq $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: btrq $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: btsq $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_bt_btc_btr_bts_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: btq %rsi, %rdi # sched: [1:0.50] -; HASWELL-NEXT: btcq %rsi, %rdi # sched: [1:0.50] -; HASWELL-NEXT: btrq %rsi, %rdi # sched: [1:0.50] -; HASWELL-NEXT: btsq %rsi, %rdi # sched: [1:0.50] -; HASWELL-NEXT: btq %rsi, (%rdx) # sched: [1:2.50] -; HASWELL-NEXT: btcq %rsi, (%rdx) # sched: [1:2.75] -; HASWELL-NEXT: btrq %rsi, (%rdx) # sched: [1:2.75] -; HASWELL-NEXT: btsq %rsi, (%rdx) # sched: [1:2.75] -; HASWELL-NEXT: btq $7, %rdi # sched: [1:0.50] -; HASWELL-NEXT: btcq $7, %rdi # sched: [1:0.50] -; HASWELL-NEXT: btrq $7, %rdi # sched: [1:0.50] -; HASWELL-NEXT: btsq $7, %rdi # sched: [1:0.50] -; HASWELL-NEXT: btq $7, (%rdx) # sched: [6:0.50] -; HASWELL-NEXT: btcq $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: btrq $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: btsq $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_bt_btc_btr_bts_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: btq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: btcq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: btrq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: btsq %rsi, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: btq %rsi, (%rdx) # sched: [6:0.50] -; BROADWELL-NEXT: btcq %rsi, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btrq %rsi, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btsq %rsi, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btq $7, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: btcq $7, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: btrq $7, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: btsq $7, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: btq $7, (%rdx) # sched: [6:0.50] -; BROADWELL-NEXT: btcq $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btrq $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: btsq $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_bt_btc_btr_bts_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: btq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: btcq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: btrq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: btsq %rsi, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: btq %rsi, (%rdx) # sched: [6:0.50] -; SKYLAKE-NEXT: btcq %rsi, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btrq %rsi, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btsq %rsi, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btq $7, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: btcq $7, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: btrq $7, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: btsq $7, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: btq $7, (%rdx) # sched: [6:0.50] -; SKYLAKE-NEXT: btcq $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btrq $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: btsq $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_bt_btc_btr_bts_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: btq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: btcq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: btrq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: btsq %rsi, %rdi # sched: [1:0.50] -; SKX-NEXT: btq %rsi, (%rdx) # sched: [6:0.50] -; SKX-NEXT: btcq %rsi, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btrq %rsi, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btsq %rsi, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btq $7, %rdi # sched: [1:0.50] -; SKX-NEXT: btcq $7, %rdi # sched: [1:0.50] -; SKX-NEXT: btrq $7, %rdi # sched: [1:0.50] -; SKX-NEXT: btsq $7, %rdi # sched: [1:0.50] -; SKX-NEXT: btq $7, (%rdx) # sched: [6:0.50] -; SKX-NEXT: btcq $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btrq $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: btsq $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_bt_btc_btr_bts_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: btq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: btcq %rsi, %rdi # sched: [2:0.50] -; BDVER2-NEXT: btrq %rsi, %rdi # sched: [2:0.50] -; BDVER2-NEXT: btsq %rsi, %rdi # sched: [2:0.50] -; BDVER2-NEXT: btq %rsi, (%rdx) # sched: [5:0.50] -; BDVER2-NEXT: btcq %rsi, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btrq %rsi, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btsq %rsi, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: btcq $7, %rdi # sched: [2:0.50] -; BDVER2-NEXT: btrq $7, %rdi # sched: [2:0.50] -; BDVER2-NEXT: btsq $7, %rdi # sched: [2:0.50] -; BDVER2-NEXT: btq $7, (%rdx) # sched: [5:0.50] -; BDVER2-NEXT: btcq $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btrq $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: btsq $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_bt_btc_btr_bts_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: btq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: btcq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: btrq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: btsq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: btq %rsi, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: btcq %rsi, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btrq %rsi, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btsq %rsi, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: btcq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: btrq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: btsq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: btq $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: btcq $7, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btrq $7, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: btsq $7, (%rdx) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_bt_btc_btr_bts_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: btq %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: btcq %rsi, %rdi # sched: [2:0.25] -; ZNVER1-NEXT: btrq %rsi, %rdi # sched: [2:0.25] -; ZNVER1-NEXT: btsq %rsi, %rdi # sched: [2:0.25] -; ZNVER1-NEXT: btq %rsi, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: btcq %rsi, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btrq %rsi, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btsq %rsi, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: btcq $7, %rdi # sched: [2:0.25] -; ZNVER1-NEXT: btrq $7, %rdi # sched: [2:0.25] -; ZNVER1-NEXT: btsq $7, %rdi # sched: [2:0.25] -; ZNVER1-NEXT: btq $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: btcq $7, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btrq $7, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: btsq $7, (%rdx) # sched: [6:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "btq $1, $0 \0A\09 btcq $1, $0 \0A\09 btrq $1, $0 \0A\09 btsq $1, $0 \0A\09 btq $1, $2 \0A\09 btcq $1, $2 \0A\09 btrq $1, $2 \0A\09 btsq $1, $2 \0A\09 btq $3, $0 \0A\09 btcq $3, $0 \0A\09 btrq $3, $0 \0A\09 btsq $3, $0 \0A\09 btq $3, $2 \0A\09 btcq $3, $2 \0A\09 btrq $3, $2 \0A\09 btsq $3, $2", "r,r,*m,i"(i64 %a0, i64 %a1, i64 *%a2, i8 7) - ret void -} - -; TODO - test_call - -define void @test_cbw_cdq_cdqe_cqo_cwd_cwde() optsize { -; GENERIC-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cbtw # sched: [1:0.33] -; GENERIC-NEXT: cltd # sched: [1:0.50] -; GENERIC-NEXT: cltq # sched: [1:0.33] -; GENERIC-NEXT: cqto # sched: [1:0.50] -; GENERIC-NEXT: cwtd # sched: [2:1.00] -; GENERIC-NEXT: cwtl # sched: [1:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cbtw # sched: [4:2.00] -; ATOM-NEXT: cltd # sched: [4:2.00] -; ATOM-NEXT: cltq # sched: [4:2.00] -; ATOM-NEXT: cqto # sched: [4:2.00] -; ATOM-NEXT: cwtd # sched: [4:2.00] -; ATOM-NEXT: cwtl # sched: [4:2.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cbtw # sched: [1:0.50] -; SLM-NEXT: cltd # sched: [1:0.50] -; SLM-NEXT: cltq # sched: [1:0.50] -; SLM-NEXT: cqto # sched: [1:0.50] -; SLM-NEXT: cwtd # sched: [1:0.50] -; SLM-NEXT: cwtl # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cbtw # sched: [1:0.33] -; SANDY-NEXT: cltd # sched: [1:0.50] -; SANDY-NEXT: cltq # sched: [1:0.33] -; SANDY-NEXT: cqto # sched: [1:0.50] -; SANDY-NEXT: cwtd # sched: [2:1.00] -; SANDY-NEXT: cwtl # sched: [1:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cbtw # sched: [1:0.25] -; HASWELL-NEXT: cltd # sched: [1:0.50] -; HASWELL-NEXT: cltq # sched: [1:0.25] -; HASWELL-NEXT: cqto # sched: [1:0.50] -; HASWELL-NEXT: cwtd # sched: [2:0.50] -; HASWELL-NEXT: cwtl # sched: [1:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cbtw # sched: [1:0.25] -; BROADWELL-NEXT: cltd # sched: [1:0.50] -; BROADWELL-NEXT: cltq # sched: [1:0.25] -; BROADWELL-NEXT: cqto # sched: [1:0.50] -; BROADWELL-NEXT: cwtd # sched: [2:0.50] -; BROADWELL-NEXT: cwtl # sched: [1:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cbtw # sched: [1:0.25] -; SKYLAKE-NEXT: cltd # sched: [1:0.50] -; SKYLAKE-NEXT: cltq # sched: [1:0.25] -; SKYLAKE-NEXT: cqto # sched: [1:0.50] -; SKYLAKE-NEXT: cwtd # sched: [2:0.50] -; SKYLAKE-NEXT: cwtl # sched: [1:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cbtw # sched: [1:0.25] -; SKX-NEXT: cltd # sched: [1:0.50] -; SKX-NEXT: cltq # sched: [1:0.25] -; SKX-NEXT: cqto # sched: [1:0.50] -; SKX-NEXT: cwtd # sched: [2:0.50] -; SKX-NEXT: cwtl # sched: [1:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cbtw # sched: [1:0.50] -; BDVER2-NEXT: cltd # sched: [1:0.50] -; BDVER2-NEXT: cltq # sched: [1:0.50] -; BDVER2-NEXT: cqto # sched: [1:0.50] -; BDVER2-NEXT: cwtd # sched: [1:0.50] -; BDVER2-NEXT: cwtl # sched: [1:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cbtw # sched: [1:0.50] -; BTVER2-NEXT: cltd # sched: [1:0.50] -; BTVER2-NEXT: cltq # sched: [1:0.50] -; BTVER2-NEXT: cqto # sched: [1:0.50] -; BTVER2-NEXT: cwtd # sched: [1:0.50] -; BTVER2-NEXT: cwtl # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cbtw # sched: [1:0.25] -; ZNVER1-NEXT: cltd # sched: [1:0.25] -; ZNVER1-NEXT: cltq # sched: [1:0.25] -; ZNVER1-NEXT: cqto # sched: [1:0.25] -; ZNVER1-NEXT: cwtd # sched: [1:0.25] -; ZNVER1-NEXT: cwtl # sched: [1:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "cbw \0A\09 cdq \0A\09 cdqe \0A\09 cqo \0A\09 cwd \0A\09 cwde", ""() nounwind - ret void -} - -define void @test_clc_cld_cmc() optsize { -; GENERIC-LABEL: test_clc_cld_cmc: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: clc # sched: [1:0.25] -; GENERIC-NEXT: cld # sched: [1:0.33] -; GENERIC-NEXT: cmc # sched: [1:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_clc_cld_cmc: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: clc # sched: [1:0.50] -; ATOM-NEXT: cld # sched: [3:1.50] -; ATOM-NEXT: cmc # sched: [1:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_clc_cld_cmc: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: clc # sched: [1:0.50] -; SLM-NEXT: cld # sched: [1:0.50] -; SLM-NEXT: cmc # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_clc_cld_cmc: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: clc # sched: [1:0.25] -; SANDY-NEXT: cld # sched: [1:0.33] -; SANDY-NEXT: cmc # sched: [1:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_clc_cld_cmc: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: clc # sched: [1:0.25] -; HASWELL-NEXT: cld # sched: [3:1.00] -; HASWELL-NEXT: cmc # sched: [1:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_clc_cld_cmc: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: clc # sched: [1:0.25] -; BROADWELL-NEXT: cld # sched: [3:1.00] -; BROADWELL-NEXT: cmc # sched: [1:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_clc_cld_cmc: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: clc # sched: [1:0.17] -; SKYLAKE-NEXT: cld # sched: [3:1.00] -; SKYLAKE-NEXT: cmc # sched: [1:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_clc_cld_cmc: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: clc # sched: [1:0.17] -; SKX-NEXT: cld # sched: [3:1.00] -; SKX-NEXT: cmc # sched: [1:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_clc_cld_cmc: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: clc # sched: [1:0.50] -; BDVER2-NEXT: cld # sched: [1:0.50] -; BDVER2-NEXT: cmc # sched: [1:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_clc_cld_cmc: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: clc # sched: [1:0.50] -; BTVER2-NEXT: cld # sched: [1:0.50] -; BTVER2-NEXT: cmc # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_clc_cld_cmc: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: clc # sched: [1:0.25] -; ZNVER1-NEXT: cld # sched: [1:0.25] -; ZNVER1-NEXT: cmc # sched: [1:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "clc \0A\09 cld \0A\09 cmc", ""() nounwind - ret void -} - -define void @test_cmp_8(i8 %a0, i8* %a1) optsize { -; GENERIC-LABEL: test_cmp_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmpb $7, %al # sched: [1:0.33] -; GENERIC-NEXT: cmpb $7, %dil # sched: [1:0.33] -; GENERIC-NEXT: cmpb $7, (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: cmpb %dil, %dil # sched: [1:0.33] -; GENERIC-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: cmpb (%rsi), %dil # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmp_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cmpb $7, %al # sched: [1:0.50] -; ATOM-NEXT: cmpb $7, %dil # sched: [1:0.50] -; ATOM-NEXT: cmpb $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: cmpb %dil, %dil # sched: [1:0.50] -; ATOM-NEXT: cmpb %dil, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: cmpb (%rsi), %dil # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmp_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cmpb $7, %al # sched: [1:0.50] -; SLM-NEXT: cmpb $7, %dil # sched: [1:0.50] -; SLM-NEXT: cmpb $7, (%rsi) # sched: [4:1.00] -; SLM-NEXT: cmpb %dil, %dil # sched: [1:0.50] -; SLM-NEXT: cmpb %dil, (%rsi) # sched: [4:1.00] -; SLM-NEXT: cmpb (%rsi), %dil # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cmp_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cmpb $7, %al # sched: [1:0.33] -; SANDY-NEXT: cmpb $7, %dil # sched: [1:0.33] -; SANDY-NEXT: cmpb $7, (%rsi) # sched: [6:0.50] -; SANDY-NEXT: cmpb %dil, %dil # sched: [1:0.33] -; SANDY-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50] -; SANDY-NEXT: cmpb (%rsi), %dil # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmp_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cmpb $7, %al # sched: [1:0.25] -; HASWELL-NEXT: cmpb $7, %dil # sched: [1:0.25] -; HASWELL-NEXT: cmpb $7, (%rsi) # sched: [6:0.50] -; HASWELL-NEXT: cmpb %dil, %dil # sched: [1:0.25] -; HASWELL-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50] -; HASWELL-NEXT: cmpb (%rsi), %dil # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmp_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cmpb $7, %al # sched: [1:0.25] -; BROADWELL-NEXT: cmpb $7, %dil # sched: [1:0.25] -; BROADWELL-NEXT: cmpb $7, (%rsi) # sched: [6:0.50] -; BROADWELL-NEXT: cmpb %dil, %dil # sched: [1:0.25] -; BROADWELL-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50] -; BROADWELL-NEXT: cmpb (%rsi), %dil # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmp_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cmpb $7, %al # sched: [1:0.25] -; SKYLAKE-NEXT: cmpb $7, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: cmpb $7, (%rsi) # sched: [6:0.50] -; SKYLAKE-NEXT: cmpb %dil, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50] -; SKYLAKE-NEXT: cmpb (%rsi), %dil # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmp_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cmpb $7, %al # sched: [1:0.25] -; SKX-NEXT: cmpb $7, %dil # sched: [1:0.25] -; SKX-NEXT: cmpb $7, (%rsi) # sched: [6:0.50] -; SKX-NEXT: cmpb %dil, %dil # sched: [1:0.25] -; SKX-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50] -; SKX-NEXT: cmpb (%rsi), %dil # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmp_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpb $7, %al # sched: [1:0.50] -; BDVER2-NEXT: cmpb $7, %dil # sched: [1:0.50] -; BDVER2-NEXT: cmpb $7, (%rsi) # sched: [5:0.50] -; BDVER2-NEXT: cmpb %dil, %dil # sched: [1:0.50] -; BDVER2-NEXT: cmpb %dil, (%rsi) # sched: [5:0.50] -; BDVER2-NEXT: cmpb (%rsi), %dil # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmp_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cmpb $7, %al # sched: [1:0.50] -; BTVER2-NEXT: cmpb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: cmpb $7, (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: cmpb %dil, %dil # sched: [1:0.50] -; BTVER2-NEXT: cmpb %dil, (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: cmpb (%rsi), %dil # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmp_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cmpb $7, %al # sched: [1:0.25] -; ZNVER1-NEXT: cmpb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: cmpb $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: cmpb %dil, %dil # sched: [1:0.25] -; ZNVER1-NEXT: cmpb %dil, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: cmpb (%rsi), %dil # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "cmpb $2, %AL \0A\09 cmpb $2, $0 \0A\09 cmpb $2, $1 \0A\09 cmpb $0, $0 \0A\09 cmpb $0, $1 \0A\09 cmpb $1, $0", "r,*m,i"(i8 %a0, i8* %a1, i8 7) nounwind - ret void -} -define void @test_cmp_16(i16 %a0, i16* %a1) optsize { -; GENERIC-LABEL: test_cmp_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmpw $511, %ax # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: cmpw $511, %di # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: cmpw $511, (%rsi) # imm = 0x1FF -; GENERIC-NEXT: # sched: [6:0.50] -; GENERIC-NEXT: cmpw $7, %di # sched: [1:0.33] -; GENERIC-NEXT: cmpw $7, (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: cmpw %di, %di # sched: [1:0.33] -; GENERIC-NEXT: cmpw %di, (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: cmpw (%rsi), %di # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmp_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cmpw $511, %ax # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: cmpw $511, %di # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: cmpw $511, (%rsi) # imm = 0x1FF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: cmpw $7, %di # sched: [1:0.50] -; ATOM-NEXT: cmpw $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: cmpw %di, %di # sched: [1:0.50] -; ATOM-NEXT: cmpw %di, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: cmpw (%rsi), %di # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmp_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cmpw $511, %ax # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: cmpw $511, %di # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: cmpw $511, (%rsi) # imm = 0x1FF -; SLM-NEXT: # sched: [4:1.00] -; SLM-NEXT: cmpw $7, %di # sched: [1:0.50] -; SLM-NEXT: cmpw $7, (%rsi) # sched: [4:1.00] -; SLM-NEXT: cmpw %di, %di # sched: [1:0.50] -; SLM-NEXT: cmpw %di, (%rsi) # sched: [4:1.00] -; SLM-NEXT: cmpw (%rsi), %di # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cmp_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cmpw $511, %ax # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: cmpw $511, %di # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: cmpw $511, (%rsi) # imm = 0x1FF -; SANDY-NEXT: # sched: [6:0.50] -; SANDY-NEXT: cmpw $7, %di # sched: [1:0.33] -; SANDY-NEXT: cmpw $7, (%rsi) # sched: [6:0.50] -; SANDY-NEXT: cmpw %di, %di # sched: [1:0.33] -; SANDY-NEXT: cmpw %di, (%rsi) # sched: [6:0.50] -; SANDY-NEXT: cmpw (%rsi), %di # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmp_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cmpw $511, %ax # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: cmpw $511, %di # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: cmpw $511, (%rsi) # imm = 0x1FF -; HASWELL-NEXT: # sched: [6:0.50] -; HASWELL-NEXT: cmpw $7, %di # sched: [1:0.25] -; HASWELL-NEXT: cmpw $7, (%rsi) # sched: [6:0.50] -; HASWELL-NEXT: cmpw %di, %di # sched: [1:0.25] -; HASWELL-NEXT: cmpw %di, (%rsi) # sched: [6:0.50] -; HASWELL-NEXT: cmpw (%rsi), %di # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmp_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cmpw $511, %ax # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: cmpw $511, %di # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: cmpw $511, (%rsi) # imm = 0x1FF -; BROADWELL-NEXT: # sched: [6:0.50] -; BROADWELL-NEXT: cmpw $7, %di # sched: [1:0.25] -; BROADWELL-NEXT: cmpw $7, (%rsi) # sched: [6:0.50] -; BROADWELL-NEXT: cmpw %di, %di # sched: [1:0.25] -; BROADWELL-NEXT: cmpw %di, (%rsi) # sched: [6:0.50] -; BROADWELL-NEXT: cmpw (%rsi), %di # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmp_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cmpw $511, %ax # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: cmpw $511, %di # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: cmpw $511, (%rsi) # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [6:0.50] -; SKYLAKE-NEXT: cmpw $7, %di # sched: [1:0.25] -; SKYLAKE-NEXT: cmpw $7, (%rsi) # sched: [6:0.50] -; SKYLAKE-NEXT: cmpw %di, %di # sched: [1:0.25] -; SKYLAKE-NEXT: cmpw %di, (%rsi) # sched: [6:0.50] -; SKYLAKE-NEXT: cmpw (%rsi), %di # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmp_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cmpw $511, %ax # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: cmpw $511, %di # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: cmpw $511, (%rsi) # imm = 0x1FF -; SKX-NEXT: # sched: [6:0.50] -; SKX-NEXT: cmpw $7, %di # sched: [1:0.25] -; SKX-NEXT: cmpw $7, (%rsi) # sched: [6:0.50] -; SKX-NEXT: cmpw %di, %di # sched: [1:0.25] -; SKX-NEXT: cmpw %di, (%rsi) # sched: [6:0.50] -; SKX-NEXT: cmpw (%rsi), %di # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmp_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: cmpw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: cmpw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [5:0.50] -; BDVER2-NEXT: cmpw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: cmpw $7, (%rsi) # sched: [5:0.50] -; BDVER2-NEXT: cmpw %di, %di # sched: [1:0.50] -; BDVER2-NEXT: cmpw %di, (%rsi) # sched: [5:0.50] -; BDVER2-NEXT: cmpw (%rsi), %di # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmp_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cmpw $511, %ax # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: cmpw $511, %di # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: cmpw $511, (%rsi) # imm = 0x1FF -; BTVER2-NEXT: # sched: [4:1.00] -; BTVER2-NEXT: cmpw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: cmpw $7, (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: cmpw %di, %di # sched: [1:0.50] -; BTVER2-NEXT: cmpw %di, (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: cmpw (%rsi), %di # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmp_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cmpw $511, %ax # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: cmpw $511, %di # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: cmpw $511, (%rsi) # imm = 0x1FF -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: cmpw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmpw $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: cmpw %di, %di # sched: [1:0.25] -; ZNVER1-NEXT: cmpw %di, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: cmpw (%rsi), %di # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "cmpw $2, %AX \0A\09 cmpw $2, $0 \0A\09 cmpw $2, $1 \0A\09 cmpw $3, $0 \0A\09 cmpw $3, $1 \0A\09 cmpw $0, $0 \0A\09 cmpw $0, $1 \0A\09 cmpw $1, $0", "r,*m,i,i"(i16 %a0, i16* %a1, i16 511, i8 7) nounwind - ret void -} -define void @test_cmp_32(i32 %a0, i32* %a1) optsize { -; GENERIC-LABEL: test_cmp_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmpl $665536, %eax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: cmpl $665536, %edi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [6:0.50] -; GENERIC-NEXT: cmpl $7, %edi # sched: [1:0.33] -; GENERIC-NEXT: cmpl $7, (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: cmpl %edi, %edi # sched: [1:0.33] -; GENERIC-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: cmpl (%rsi), %edi # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmp_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cmpl $665536, %eax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: cmpl $665536, %edi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: cmpl $7, %edi # sched: [1:0.50] -; ATOM-NEXT: cmpl $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: cmpl %edi, %edi # sched: [1:0.50] -; ATOM-NEXT: cmpl %edi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: cmpl (%rsi), %edi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmp_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cmpl $665536, %eax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: cmpl $665536, %edi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [4:1.00] -; SLM-NEXT: cmpl $7, %edi # sched: [1:0.50] -; SLM-NEXT: cmpl $7, (%rsi) # sched: [4:1.00] -; SLM-NEXT: cmpl %edi, %edi # sched: [1:0.50] -; SLM-NEXT: cmpl %edi, (%rsi) # sched: [4:1.00] -; SLM-NEXT: cmpl (%rsi), %edi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cmp_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cmpl $665536, %eax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: cmpl $665536, %edi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [6:0.50] -; SANDY-NEXT: cmpl $7, %edi # sched: [1:0.33] -; SANDY-NEXT: cmpl $7, (%rsi) # sched: [6:0.50] -; SANDY-NEXT: cmpl %edi, %edi # sched: [1:0.33] -; SANDY-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50] -; SANDY-NEXT: cmpl (%rsi), %edi # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmp_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cmpl $665536, %eax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: cmpl $665536, %edi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [6:0.50] -; HASWELL-NEXT: cmpl $7, %edi # sched: [1:0.25] -; HASWELL-NEXT: cmpl $7, (%rsi) # sched: [6:0.50] -; HASWELL-NEXT: cmpl %edi, %edi # sched: [1:0.25] -; HASWELL-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50] -; HASWELL-NEXT: cmpl (%rsi), %edi # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmp_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cmpl $665536, %eax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: cmpl $665536, %edi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [6:0.50] -; BROADWELL-NEXT: cmpl $7, %edi # sched: [1:0.25] -; BROADWELL-NEXT: cmpl $7, (%rsi) # sched: [6:0.50] -; BROADWELL-NEXT: cmpl %edi, %edi # sched: [1:0.25] -; BROADWELL-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50] -; BROADWELL-NEXT: cmpl (%rsi), %edi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmp_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cmpl $665536, %eax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: cmpl $665536, %edi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [6:0.50] -; SKYLAKE-NEXT: cmpl $7, %edi # sched: [1:0.25] -; SKYLAKE-NEXT: cmpl $7, (%rsi) # sched: [6:0.50] -; SKYLAKE-NEXT: cmpl %edi, %edi # sched: [1:0.25] -; SKYLAKE-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50] -; SKYLAKE-NEXT: cmpl (%rsi), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmp_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cmpl $665536, %eax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: cmpl $665536, %edi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [6:0.50] -; SKX-NEXT: cmpl $7, %edi # sched: [1:0.25] -; SKX-NEXT: cmpl $7, (%rsi) # sched: [6:0.50] -; SKX-NEXT: cmpl %edi, %edi # sched: [1:0.25] -; SKX-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50] -; SKX-NEXT: cmpl (%rsi), %edi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmp_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: cmpl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [5:0.50] -; BDVER2-NEXT: cmpl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmpl $7, (%rsi) # sched: [5:0.50] -; BDVER2-NEXT: cmpl %edi, %edi # sched: [1:0.50] -; BDVER2-NEXT: cmpl %edi, (%rsi) # sched: [5:0.50] -; BDVER2-NEXT: cmpl (%rsi), %edi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmp_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cmpl $665536, %eax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: cmpl $665536, %edi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [4:1.00] -; BTVER2-NEXT: cmpl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmpl $7, (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: cmpl %edi, %edi # sched: [1:0.50] -; BTVER2-NEXT: cmpl %edi, (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: cmpl (%rsi), %edi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmp_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cmpl $665536, %eax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: cmpl $665536, %edi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: cmpl $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmpl $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: cmpl %edi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: cmpl %edi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: cmpl (%rsi), %edi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "cmpl $2, %EAX \0A\09 cmpl $2, $0 \0A\09 cmpl $2, $1 \0A\09 cmpl $3, $0 \0A\09 cmpl $3, $1 \0A\09 cmpl $0, $0 \0A\09 cmpl $0, $1 \0A\09 cmpl $1, $0", "r,*m,i,i"(i32 %a0, i32* %a1, i32 665536, i8 7) nounwind - ret void -} -define void @test_cmp_64(i64 %a0, i64* %a1) optsize { -; GENERIC-LABEL: test_cmp_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmpq $665536, %rax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: cmpq $665536, %rdi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [6:0.50] -; GENERIC-NEXT: cmpq $7, %rdi # sched: [1:0.33] -; GENERIC-NEXT: cmpq $7, (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: cmpq %rdi, %rdi # sched: [1:0.33] -; GENERIC-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmp_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cmpq $665536, %rax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: cmpq $665536, %rdi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: cmpq $7, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmpq $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: cmpq %rdi, %rdi # sched: [1:0.50] -; ATOM-NEXT: cmpq %rdi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: cmpq (%rsi), %rdi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmp_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cmpq $665536, %rax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: cmpq $665536, %rdi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [4:1.00] -; SLM-NEXT: cmpq $7, %rdi # sched: [1:0.50] -; SLM-NEXT: cmpq $7, (%rsi) # sched: [4:1.00] -; SLM-NEXT: cmpq %rdi, %rdi # sched: [1:0.50] -; SLM-NEXT: cmpq %rdi, (%rsi) # sched: [4:1.00] -; SLM-NEXT: cmpq (%rsi), %rdi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cmp_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cmpq $665536, %rax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: cmpq $665536, %rdi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [6:0.50] -; SANDY-NEXT: cmpq $7, %rdi # sched: [1:0.33] -; SANDY-NEXT: cmpq $7, (%rsi) # sched: [6:0.50] -; SANDY-NEXT: cmpq %rdi, %rdi # sched: [1:0.33] -; SANDY-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50] -; SANDY-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmp_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cmpq $665536, %rax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: cmpq $665536, %rdi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [6:0.50] -; HASWELL-NEXT: cmpq $7, %rdi # sched: [1:0.25] -; HASWELL-NEXT: cmpq $7, (%rsi) # sched: [6:0.50] -; HASWELL-NEXT: cmpq %rdi, %rdi # sched: [1:0.25] -; HASWELL-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50] -; HASWELL-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmp_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cmpq $665536, %rax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: cmpq $665536, %rdi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [6:0.50] -; BROADWELL-NEXT: cmpq $7, %rdi # sched: [1:0.25] -; BROADWELL-NEXT: cmpq $7, (%rsi) # sched: [6:0.50] -; BROADWELL-NEXT: cmpq %rdi, %rdi # sched: [1:0.25] -; BROADWELL-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50] -; BROADWELL-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmp_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cmpq $665536, %rax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: cmpq $665536, %rdi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [6:0.50] -; SKYLAKE-NEXT: cmpq $7, %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: cmpq $7, (%rsi) # sched: [6:0.50] -; SKYLAKE-NEXT: cmpq %rdi, %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50] -; SKYLAKE-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmp_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cmpq $665536, %rax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: cmpq $665536, %rdi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [6:0.50] -; SKX-NEXT: cmpq $7, %rdi # sched: [1:0.25] -; SKX-NEXT: cmpq $7, (%rsi) # sched: [6:0.50] -; SKX-NEXT: cmpq %rdi, %rdi # sched: [1:0.25] -; SKX-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50] -; SKX-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmp_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: cmpq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [5:0.50] -; BDVER2-NEXT: cmpq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmpq $7, (%rsi) # sched: [5:0.50] -; BDVER2-NEXT: cmpq %rdi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: cmpq %rdi, (%rsi) # sched: [5:0.50] -; BDVER2-NEXT: cmpq (%rsi), %rdi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmp_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cmpq $665536, %rax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: cmpq $665536, %rdi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [4:1.00] -; BTVER2-NEXT: cmpq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmpq $7, (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: cmpq %rdi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: cmpq %rdi, (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: cmpq (%rsi), %rdi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmp_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cmpq $665536, %rax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: cmpq $665536, %rdi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: cmpq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmpq $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: cmpq %rdi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: cmpq %rdi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: cmpq (%rsi), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "cmpq $2, %RAX \0A\09 cmpq $2, $0 \0A\09 cmpq $2, $1 \0A\09 cmpq $3, $0 \0A\09 cmpq $3, $1 \0A\09 cmpq $0, $0 \0A\09 cmpq $0, $1 \0A\09 cmpq $1, $0", "r,*m,i,i"(i64 %a0, i64* %a1, i32 665536, i8 7) nounwind - ret void -} - -define void @test_cmps() optsize { -; GENERIC-LABEL: test_cmps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [8:1.00] -; GENERIC-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [8:1.00] -; GENERIC-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [8:1.00] -; GENERIC-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmps: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [3:1.50] -; ATOM-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [3:1.50] -; ATOM-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [3:1.50] -; ATOM-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [3:1.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmps: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:1.00] -; SLM-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:1.00] -; SLM-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:1.00] -; SLM-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cmps: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [8:1.00] -; SANDY-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [8:1.00] -; SANDY-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [8:1.00] -; SANDY-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [4:1.00] -; HASWELL-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [4:1.00] -; HASWELL-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [4:1.00] -; HASWELL-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [4:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:0.25] -; BROADWELL-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:0.25] -; BROADWELL-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:0.25] -; BROADWELL-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:0.25] -; SKYLAKE-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:0.25] -; SKYLAKE-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:0.25] -; SKYLAKE-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmps: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:0.25] -; SKX-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:0.25] -; SKX-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:0.25] -; SKX-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:0.50] -; BDVER2-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:0.50] -; BDVER2-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:0.50] -; BDVER2-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:0.50] -; BTVER2-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:0.50] -; BTVER2-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:0.50] -; BTVER2-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:0.25] -; ZNVER1-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:0.25] -; ZNVER1-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:0.25] -; ZNVER1-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "cmpsb \0A\09 cmpsw \0A\09 cmpsl \0A\09 cmpsq", ""() - ret void -} - -define void @test_cmpxchg_8(i8 %a0, i8 %a1, i8 *%a2) optsize { -; GENERIC-LABEL: test_cmpxchg_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmpxchgb %dil, %sil # sched: [5:1.33] -; GENERIC-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:2.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmpxchg_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cmpxchgb %dil, %sil # sched: [9:4.50] -; ATOM-NEXT: cmpxchgb %dil, (%rdx) # sched: [6:3.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmpxchg_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cmpxchgb %dil, %sil # sched: [1:0.50] -; SLM-NEXT: cmpxchgb %dil, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cmpxchg_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cmpxchgb %dil, %sil # sched: [5:1.33] -; SANDY-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:2.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmpxchg_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cmpxchgb %dil, %sil # sched: [5:1.25] -; HASWELL-NEXT: cmpxchgb %dil, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmpxchg_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cmpxchgb %dil, %sil # sched: [5:1.25] -; BROADWELL-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmpxchg_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cmpxchgb %dil, %sil # sched: [5:1.25] -; SKYLAKE-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmpxchg_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cmpxchgb %dil, %sil # sched: [5:1.25] -; SKX-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmpxchg_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpxchgb %dil, %sil # sched: [3:1.00] -; BDVER2-NEXT: cmpxchgb %dil, (%rdx) # sched: [3:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmpxchg_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cmpxchgb %dil, %sil # sched: [1:0.50] -; BTVER2-NEXT: cmpxchgb %dil, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmpxchg_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cmpxchgb %dil, %sil # sched: [1:0.25] -; ZNVER1-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "cmpxchgb $0, $1 \0a\09 cmpxchgb $0, $2", "r,r,*m"(i8 %a0, i8 %a1, i8 *%a2) nounwind - ret void -} -define void @test_cmpxchg_16(i16 %a0, i16 %a1, i16 *%a2) optsize { -; GENERIC-LABEL: test_cmpxchg_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmpxchgw %di, %si # sched: [5:1.33] -; GENERIC-NEXT: cmpxchgw %di, (%rdx) # sched: [8:2.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmpxchg_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cmpxchgw %di, %si # sched: [15:7.50] -; ATOM-NEXT: cmpxchgw %di, (%rdx) # sched: [14:7.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmpxchg_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cmpxchgw %di, %si # sched: [1:0.50] -; SLM-NEXT: cmpxchgw %di, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cmpxchg_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cmpxchgw %di, %si # sched: [5:1.33] -; SANDY-NEXT: cmpxchgw %di, (%rdx) # sched: [8:2.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmpxchg_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cmpxchgw %di, %si # sched: [5:1.25] -; HASWELL-NEXT: cmpxchgw %di, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmpxchg_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cmpxchgw %di, %si # sched: [5:1.25] -; BROADWELL-NEXT: cmpxchgw %di, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmpxchg_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cmpxchgw %di, %si # sched: [5:1.25] -; SKYLAKE-NEXT: cmpxchgw %di, (%rdx) # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmpxchg_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cmpxchgw %di, %si # sched: [5:1.25] -; SKX-NEXT: cmpxchgw %di, (%rdx) # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmpxchg_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpxchgw %di, %si # sched: [3:1.00] -; BDVER2-NEXT: cmpxchgw %di, (%rdx) # sched: [3:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmpxchg_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cmpxchgw %di, %si # sched: [1:0.50] -; BTVER2-NEXT: cmpxchgw %di, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmpxchg_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cmpxchgw %di, %si # sched: [1:0.25] -; ZNVER1-NEXT: cmpxchgw %di, (%rdx) # sched: [8:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "cmpxchgw $0, $1 \0a\09 cmpxchgw $0, $2", "r,r,*m"(i16 %a0, i16 %a1, i16 *%a2) nounwind - ret void -} -define void @test_cmpxchg_32(i32 %a0, i32 %a1, i32 *%a2) optsize { -; GENERIC-LABEL: test_cmpxchg_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmpxchgl %edi, %esi # sched: [5:1.33] -; GENERIC-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:2.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmpxchg_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cmpxchgl %edi, %esi # sched: [15:7.50] -; ATOM-NEXT: cmpxchgl %edi, (%rdx) # sched: [14:7.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmpxchg_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cmpxchgl %edi, %esi # sched: [1:0.50] -; SLM-NEXT: cmpxchgl %edi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cmpxchg_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cmpxchgl %edi, %esi # sched: [5:1.33] -; SANDY-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:2.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmpxchg_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cmpxchgl %edi, %esi # sched: [5:1.25] -; HASWELL-NEXT: cmpxchgl %edi, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmpxchg_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cmpxchgl %edi, %esi # sched: [5:1.25] -; BROADWELL-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmpxchg_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cmpxchgl %edi, %esi # sched: [5:1.25] -; SKYLAKE-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmpxchg_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cmpxchgl %edi, %esi # sched: [5:1.25] -; SKX-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmpxchg_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpxchgl %edi, %esi # sched: [3:1.00] -; BDVER2-NEXT: cmpxchgl %edi, (%rdx) # sched: [3:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmpxchg_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cmpxchgl %edi, %esi # sched: [1:0.50] -; BTVER2-NEXT: cmpxchgl %edi, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmpxchg_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cmpxchgl %edi, %esi # sched: [1:0.25] -; ZNVER1-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "cmpxchgl $0, $1 \0a\09 cmpxchgl $0, $2", "r,r,*m"(i32 %a0, i32 %a1, i32 *%a2) nounwind - ret void -} -define void @test_cmpxchg_64(i64 %a0, i64 %a1, i64 *%a2) optsize { -; GENERIC-LABEL: test_cmpxchg_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.33] -; GENERIC-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:2.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmpxchg_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cmpxchgq %rdi, %rsi # sched: [15:7.50] -; ATOM-NEXT: cmpxchgq %rdi, (%rdx) # sched: [14:7.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmpxchg_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cmpxchgq %rdi, %rsi # sched: [1:0.50] -; SLM-NEXT: cmpxchgq %rdi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cmpxchg_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.33] -; SANDY-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:2.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmpxchg_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.25] -; HASWELL-NEXT: cmpxchgq %rdi, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmpxchg_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.25] -; BROADWELL-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmpxchg_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.25] -; SKYLAKE-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmpxchg_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.25] -; SKX-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmpxchg_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpxchgq %rdi, %rsi # sched: [3:1.00] -; BDVER2-NEXT: cmpxchgq %rdi, (%rdx) # sched: [3:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmpxchg_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cmpxchgq %rdi, %rsi # sched: [1:0.50] -; BTVER2-NEXT: cmpxchgq %rdi, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmpxchg_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cmpxchgq %rdi, %rsi # sched: [1:0.25] -; ZNVER1-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "cmpxchgq $0, $1 \0a\09 cmpxchgq $0, $2", "r,r,*m"(i64 %a0, i64 %a1, i64 *%a2) nounwind - ret void -} -define void @test_cmpxchg8b_cmpxchg16b(i8 *%a0) optsize { -; GENERIC-LABEL: test_cmpxchg8b_cmpxchg16b: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cmpxchg8b (%rdi) # sched: [6:1.00] -; GENERIC-NEXT: cmpxchg16b (%rdi) # sched: [6:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmpxchg8b_cmpxchg16b: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cmpxchg8b (%rdi) # sched: [18:9.00] -; ATOM-NEXT: cmpxchg16b (%rdi) # sched: [22:11.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmpxchg8b_cmpxchg16b: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cmpxchg8b (%rdi) # sched: [4:2.00] -; SLM-NEXT: cmpxchg16b (%rdi) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cmpxchg8b_cmpxchg16b: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cmpxchg8b (%rdi) # sched: [6:1.00] -; SANDY-NEXT: cmpxchg16b (%rdi) # sched: [6:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cmpxchg8b_cmpxchg16b: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cmpxchg8b (%rdi) # sched: [17:2.75] -; HASWELL-NEXT: cmpxchg16b (%rdi) # sched: [22:4.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmpxchg8b_cmpxchg16b: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cmpxchg8b (%rdi) # sched: [16:2.75] -; BROADWELL-NEXT: cmpxchg16b (%rdi) # sched: [21:4.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmpxchg8b_cmpxchg16b: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cmpxchg8b (%rdi) # sched: [16:2.75] -; SKYLAKE-NEXT: cmpxchg16b (%rdi) # sched: [23:4.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmpxchg8b_cmpxchg16b: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cmpxchg8b (%rdi) # sched: [16:2.75] -; SKX-NEXT: cmpxchg16b (%rdi) # sched: [23:4.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cmpxchg8b_cmpxchg16b: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpxchg8b (%rdi) # sched: [3:1.00] -; BDVER2-NEXT: cmpxchg16b (%rdi) # sched: [3:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cmpxchg8b_cmpxchg16b: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cmpxchg8b (%rdi) # sched: [4:1.00] -; BTVER2-NEXT: cmpxchg16b (%rdi) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cmpxchg8b_cmpxchg16b: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cmpxchg8b (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: cmpxchg16b (%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "cmpxchg8b $0 \0a\09 cmpxchg16b $0", "*m"(i8 *%a0) nounwind - ret void -} - -define void @test_cpuid() optsize { -; GENERIC-LABEL: test_cpuid: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: cpuid # sched: [100:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cpuid: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: cpuid # sched: [121:60.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cpuid: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: cpuid # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_cpuid: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: cpuid # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_cpuid: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: cpuid # sched: [18:2.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cpuid: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: cpuid # sched: [18:2.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cpuid: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: cpuid # sched: [18:2.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cpuid: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: cpuid # sched: [18:2.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_cpuid: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: cpuid # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_cpuid: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: cpuid # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_cpuid: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: cpuid # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "cpuid", ""() nounwind - ret void -} - -define void @test_dec8(i8 %a0, i8* %a1) optsize { -; GENERIC-LABEL: test_dec8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: decb %dil # sched: [1:0.33] -; GENERIC-NEXT: decb (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_dec8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: decb %dil # sched: [1:0.50] -; ATOM-NEXT: decb (%rsi) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_dec8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: decb %dil # sched: [1:0.50] -; SLM-NEXT: decb (%rsi) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_dec8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: decb %dil # sched: [1:0.33] -; SANDY-NEXT: decb (%rsi) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_dec8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: decb %dil # sched: [1:0.25] -; HASWELL-NEXT: decb (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_dec8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: decb %dil # sched: [1:0.25] -; BROADWELL-NEXT: decb (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_dec8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: decb %dil # sched: [1:0.25] -; SKYLAKE-NEXT: decb (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_dec8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: decb %dil # sched: [1:0.25] -; SKX-NEXT: decb (%rsi) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_dec8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: decb %dil # sched: [1:0.50] -; BDVER2-NEXT: decb (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_dec8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: decb %dil # sched: [1:0.50] -; BTVER2-NEXT: decb (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_dec8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: decb %dil # sched: [1:0.25] -; ZNVER1-NEXT: decb (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "decb $0 \0A\09 decb $1", "r,*m"(i8 %a0, i8* %a1) nounwind - ret void -} -define void @test_dec16(i16 %a0, i16* %a1) optsize { -; GENERIC-LABEL: test_dec16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: decw %di # sched: [1:0.33] -; GENERIC-NEXT: decw (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_dec16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: decw %di # sched: [1:0.50] -; ATOM-NEXT: decw (%rsi) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_dec16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: decw %di # sched: [1:0.50] -; SLM-NEXT: decw (%rsi) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_dec16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: decw %di # sched: [1:0.33] -; SANDY-NEXT: decw (%rsi) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_dec16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: decw %di # sched: [1:0.25] -; HASWELL-NEXT: decw (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_dec16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: decw %di # sched: [1:0.25] -; BROADWELL-NEXT: decw (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_dec16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: decw %di # sched: [1:0.25] -; SKYLAKE-NEXT: decw (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_dec16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: decw %di # sched: [1:0.25] -; SKX-NEXT: decw (%rsi) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_dec16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: decw %di # sched: [1:0.50] -; BDVER2-NEXT: decw (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_dec16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: decw %di # sched: [1:0.50] -; BTVER2-NEXT: decw (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_dec16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: decw %di # sched: [1:0.25] -; ZNVER1-NEXT: decw (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "decw $0 \0A\09 decw $1", "r,*m"(i16 %a0, i16* %a1) nounwind - ret void -} -define void @test_dec32(i32 %a0, i32* %a1) optsize { -; GENERIC-LABEL: test_dec32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: decl %edi # sched: [1:0.33] -; GENERIC-NEXT: decl (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_dec32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: decl %edi # sched: [1:0.50] -; ATOM-NEXT: decl (%rsi) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_dec32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: decl %edi # sched: [1:0.50] -; SLM-NEXT: decl (%rsi) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_dec32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: decl %edi # sched: [1:0.33] -; SANDY-NEXT: decl (%rsi) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_dec32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: decl %edi # sched: [1:0.25] -; HASWELL-NEXT: decl (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_dec32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: decl %edi # sched: [1:0.25] -; BROADWELL-NEXT: decl (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_dec32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: decl %edi # sched: [1:0.25] -; SKYLAKE-NEXT: decl (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_dec32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: decl %edi # sched: [1:0.25] -; SKX-NEXT: decl (%rsi) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_dec32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: decl %edi # sched: [1:0.50] -; BDVER2-NEXT: decl (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_dec32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: decl %edi # sched: [1:0.50] -; BTVER2-NEXT: decl (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_dec32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: decl %edi # sched: [1:0.25] -; ZNVER1-NEXT: decl (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "decl $0 \0A\09 decl $1", "r,*m"(i32 %a0, i32* %a1) nounwind - ret void -} -define void @test_dec64(i64 %a0, i64* %a1) optsize { -; GENERIC-LABEL: test_dec64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: decq %rdi # sched: [1:0.33] -; GENERIC-NEXT: decq (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_dec64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: decq %rdi # sched: [1:0.50] -; ATOM-NEXT: decq (%rsi) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_dec64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: decq %rdi # sched: [1:0.50] -; SLM-NEXT: decq (%rsi) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_dec64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: decq %rdi # sched: [1:0.33] -; SANDY-NEXT: decq (%rsi) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_dec64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: decq %rdi # sched: [1:0.25] -; HASWELL-NEXT: decq (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_dec64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: decq %rdi # sched: [1:0.25] -; BROADWELL-NEXT: decq (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_dec64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: decq %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: decq (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_dec64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: decq %rdi # sched: [1:0.25] -; SKX-NEXT: decq (%rsi) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_dec64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: decq %rdi # sched: [1:0.50] -; BDVER2-NEXT: decq (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_dec64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: decq %rdi # sched: [1:0.50] -; BTVER2-NEXT: decq (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_dec64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: decq %rdi # sched: [1:0.25] -; ZNVER1-NEXT: decq (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "decq $0 \0A\09 decq $1", "r,*m"(i64 %a0, i64* %a1) nounwind - ret void -} - -define void @test_div(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) optsize { -; GENERIC-LABEL: test_div: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; GENERIC-NEXT: #APP -; GENERIC-NEXT: divb %dil # sched: [25:10.00] -; GENERIC-NEXT: divb (%r8) # sched: [30:10.00] -; GENERIC-NEXT: divw %si # sched: [25:10.00] -; GENERIC-NEXT: divw (%r9) # sched: [30:10.00] -; GENERIC-NEXT: divl %edx # sched: [25:10.00] -; GENERIC-NEXT: divl (%rax) # sched: [30:10.00] -; GENERIC-NEXT: divq %rcx # sched: [25:10.00] -; GENERIC-NEXT: divq (%r10) # sched: [30:10.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_div: -; ATOM: # %bb.0: -; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [1:1.00] -; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: divb %dil # sched: [50:25.00] -; ATOM-NEXT: divb (%r8) # sched: [68:34.00] -; ATOM-NEXT: divw %si # sched: [50:25.00] -; ATOM-NEXT: divw (%r9) # sched: [50:25.00] -; ATOM-NEXT: divl %edx # sched: [50:25.00] -; ATOM-NEXT: divl (%rax) # sched: [50:25.00] -; ATOM-NEXT: divq %rcx # sched: [130:65.00] -; ATOM-NEXT: divq (%r10) # sched: [130:65.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_div: -; SLM: # %bb.0: -; SLM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00] -; SLM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: divb %dil # sched: [25:25.00] -; SLM-NEXT: divb (%r8) # sched: [29:25.00] -; SLM-NEXT: divw %si # sched: [25:25.00] -; SLM-NEXT: divw (%r9) # sched: [29:25.00] -; SLM-NEXT: divl %edx # sched: [25:25.00] -; SLM-NEXT: divl (%rax) # sched: [29:25.00] -; SLM-NEXT: divq %rcx # sched: [25:25.00] -; SLM-NEXT: divq (%r10) # sched: [29:25.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_div: -; SANDY: # %bb.0: -; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: divb %dil # sched: [25:10.00] -; SANDY-NEXT: divb (%r8) # sched: [30:10.00] -; SANDY-NEXT: divw %si # sched: [25:10.00] -; SANDY-NEXT: divw (%r9) # sched: [30:10.00] -; SANDY-NEXT: divl %edx # sched: [25:10.00] -; SANDY-NEXT: divl (%rax) # sched: [30:10.00] -; SANDY-NEXT: divq %rcx # sched: [25:10.00] -; SANDY-NEXT: divq (%r10) # sched: [30:10.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_div: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: divb %dil # sched: [22:1.00] -; HASWELL-NEXT: divb (%r8) # sched: [29:10.00] -; HASWELL-NEXT: divw %si # sched: [98:8.00] -; HASWELL-NEXT: divw (%r9) # sched: [29:10.00] -; HASWELL-NEXT: divl %edx # sched: [98:8.00] -; HASWELL-NEXT: divl (%rax) # sched: [29:10.00] -; HASWELL-NEXT: divq %rcx # sched: [98:8.00] -; HASWELL-NEXT: divq (%r10) # sched: [29:10.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_div: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: divb %dil # sched: [25:10.00] -; BROADWELL-NEXT: divb (%r8) # sched: [34:2.00] -; BROADWELL-NEXT: divw %si # sched: [80:8.00] -; BROADWELL-NEXT: divw (%r9) # sched: [34:2.00] -; BROADWELL-NEXT: divl %edx # sched: [80:8.00] -; BROADWELL-NEXT: divl (%rax) # sched: [34:2.00] -; BROADWELL-NEXT: divq %rcx # sched: [80:8.00] -; BROADWELL-NEXT: divq (%r10) # sched: [34:2.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_div: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: divb %dil # sched: [25:10.00] -; SKYLAKE-NEXT: divb (%r8) # sched: [29:10.00] -; SKYLAKE-NEXT: divw %si # sched: [76:8.00] -; SKYLAKE-NEXT: divw (%r9) # sched: [29:10.00] -; SKYLAKE-NEXT: divl %edx # sched: [76:8.00] -; SKYLAKE-NEXT: divl (%rax) # sched: [29:10.00] -; SKYLAKE-NEXT: divq %rcx # sched: [76:8.00] -; SKYLAKE-NEXT: divq (%r10) # sched: [29:10.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_div: -; SKX: # %bb.0: -; SKX-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SKX-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: divb %dil # sched: [25:10.00] -; SKX-NEXT: divb (%r8) # sched: [29:10.00] -; SKX-NEXT: divw %si # sched: [76:8.00] -; SKX-NEXT: divw (%r9) # sched: [29:10.00] -; SKX-NEXT: divl %edx # sched: [76:8.00] -; SKX-NEXT: divl (%rax) # sched: [29:10.00] -; SKX-NEXT: divq %rcx # sched: [76:8.00] -; SKX-NEXT: divq (%r10) # sched: [29:10.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_div: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: divb %dil # sched: [12:12.00] -; BDVER2-NEXT: divb (%r8) # sched: [16:12.00] -; BDVER2-NEXT: divw %si # sched: [15:15.00] -; BDVER2-NEXT: divw (%r9) # sched: [19:15.00] -; BDVER2-NEXT: divl %edx # sched: [14:14.00] -; BDVER2-NEXT: divl (%rax) # sched: [18:14.00] -; BDVER2-NEXT: divq %rcx # sched: [14:14.00] -; BDVER2-NEXT: divq (%r10) # sched: [18:14.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_div: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00] -; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: divb %dil # sched: [12:12.00] -; BTVER2-NEXT: divb (%r8) # sched: [15:12.00] -; BTVER2-NEXT: divw %si # sched: [17:17.00] -; BTVER2-NEXT: divw (%r9) # sched: [20:17.00] -; BTVER2-NEXT: divl %edx # sched: [25:25.00] -; BTVER2-NEXT: divl (%rax) # sched: [28:25.00] -; BTVER2-NEXT: divq %rcx # sched: [41:41.00] -; BTVER2-NEXT: divq (%r10) # sched: [44:41.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_div: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50] -; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: divb %dil # sched: [15:15.00] -; ZNVER1-NEXT: divb (%r8) # sched: [19:15.00] -; ZNVER1-NEXT: divw %si # sched: [17:17.00] -; ZNVER1-NEXT: divw (%r9) # sched: [21:17.00] -; ZNVER1-NEXT: divl %edx # sched: [25:25.00] -; ZNVER1-NEXT: divl (%rax) # sched: [29:25.00] -; ZNVER1-NEXT: divq %rcx # sched: [41:41.00] -; ZNVER1-NEXT: divq (%r10) # sched: [45:41.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "divb $0 \0A\09 divb $4 \0A\09 divw $1 \0A\09 divw $5 \0A\09 divl $2 \0A\09 divl $6 \0A\09 divq $3 \0A\09 divq $7", "r,r,r,r,*m,*m,*m,*m"(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) nounwind - ret void -} - -define void @test_enter() optsize { -; GENERIC-LABEL: test_enter: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: enter $7, $4095 # imm = 0xFFF -; GENERIC-NEXT: # sched: [100:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_enter: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: enter $7, $4095 # imm = 0xFFF -; ATOM-NEXT: # sched: [32:16.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_enter: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: enter $7, $4095 # imm = 0xFFF -; SLM-NEXT: # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_enter: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: enter $7, $4095 # imm = 0xFFF -; SANDY-NEXT: # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_enter: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: enter $7, $4095 # imm = 0xFFF -; HASWELL-NEXT: # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_enter: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: enter $7, $4095 # imm = 0xFFF -; BROADWELL-NEXT: # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_enter: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: enter $7, $4095 # imm = 0xFFF -; SKYLAKE-NEXT: # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_enter: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: enter $7, $4095 # imm = 0xFFF -; SKX-NEXT: # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_enter: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: enter $7, $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_enter: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: enter $7, $4095 # imm = 0xFFF -; BTVER2-NEXT: # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_enter: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: enter $7, $4095 # imm = 0xFFF -; ZNVER1-NEXT: # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "enter $0, $1", "i,i"(i8 7, i16 4095) nounwind - ret void -} - -define void @test_idiv(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) optsize { -; GENERIC-LABEL: test_idiv: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; GENERIC-NEXT: #APP -; GENERIC-NEXT: idivb %dil # sched: [25:10.00] -; GENERIC-NEXT: idivb (%r8) # sched: [30:10.00] -; GENERIC-NEXT: idivw %si # sched: [25:10.00] -; GENERIC-NEXT: idivw (%r9) # sched: [30:10.00] -; GENERIC-NEXT: idivl %edx # sched: [25:10.00] -; GENERIC-NEXT: idivl (%rax) # sched: [30:10.00] -; GENERIC-NEXT: idivq %rcx # sched: [25:10.00] -; GENERIC-NEXT: idivq (%r10) # sched: [30:10.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_idiv: -; ATOM: # %bb.0: -; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [1:1.00] -; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: idivb %dil # sched: [62:31.00] -; ATOM-NEXT: idivb (%r8) # sched: [62:31.00] -; ATOM-NEXT: idivw %si # sched: [62:31.00] -; ATOM-NEXT: idivw (%r9) # sched: [62:31.00] -; ATOM-NEXT: idivl %edx # sched: [62:31.00] -; ATOM-NEXT: idivl (%rax) # sched: [62:31.00] -; ATOM-NEXT: idivq %rcx # sched: [130:65.00] -; ATOM-NEXT: idivq (%r10) # sched: [130:65.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_idiv: -; SLM: # %bb.0: -; SLM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00] -; SLM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: idivb %dil # sched: [25:25.00] -; SLM-NEXT: idivb (%r8) # sched: [29:25.00] -; SLM-NEXT: idivw %si # sched: [25:25.00] -; SLM-NEXT: idivw (%r9) # sched: [29:25.00] -; SLM-NEXT: idivl %edx # sched: [25:25.00] -; SLM-NEXT: idivl (%rax) # sched: [29:25.00] -; SLM-NEXT: idivq %rcx # sched: [25:25.00] -; SLM-NEXT: idivq (%r10) # sched: [29:25.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_idiv: -; SANDY: # %bb.0: -; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: idivb %dil # sched: [25:10.00] -; SANDY-NEXT: idivb (%r8) # sched: [30:10.00] -; SANDY-NEXT: idivw %si # sched: [25:10.00] -; SANDY-NEXT: idivw (%r9) # sched: [30:10.00] -; SANDY-NEXT: idivl %edx # sched: [25:10.00] -; SANDY-NEXT: idivl (%rax) # sched: [30:10.00] -; SANDY-NEXT: idivq %rcx # sched: [25:10.00] -; SANDY-NEXT: idivq (%r10) # sched: [30:10.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_idiv: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: idivb %dil # sched: [23:1.00] -; HASWELL-NEXT: idivb (%r8) # sched: [29:10.00] -; HASWELL-NEXT: idivw %si # sched: [112:16.50] -; HASWELL-NEXT: idivw (%r9) # sched: [29:10.00] -; HASWELL-NEXT: idivl %edx # sched: [112:16.50] -; HASWELL-NEXT: idivl (%rax) # sched: [29:10.00] -; HASWELL-NEXT: idivq %rcx # sched: [112:16.50] -; HASWELL-NEXT: idivq (%r10) # sched: [29:10.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_idiv: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: idivb %dil # sched: [25:10.00] -; BROADWELL-NEXT: idivb (%r8) # sched: [35:2.00] -; BROADWELL-NEXT: idivw %si # sched: [25:10.00] -; BROADWELL-NEXT: idivw (%r9) # sched: [35:2.00] -; BROADWELL-NEXT: idivl %edx # sched: [25:10.00] -; BROADWELL-NEXT: idivl (%rax) # sched: [35:2.00] -; BROADWELL-NEXT: idivq %rcx # sched: [25:10.00] -; BROADWELL-NEXT: idivq (%r10) # sched: [35:2.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_idiv: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: idivb %dil # sched: [25:10.00] -; SKYLAKE-NEXT: idivb (%r8) # sched: [28:4.00] -; SKYLAKE-NEXT: idivw %si # sched: [102:16.50] -; SKYLAKE-NEXT: idivw (%r9) # sched: [28:4.00] -; SKYLAKE-NEXT: idivl %edx # sched: [102:16.50] -; SKYLAKE-NEXT: idivl (%rax) # sched: [28:4.00] -; SKYLAKE-NEXT: idivq %rcx # sched: [102:16.50] -; SKYLAKE-NEXT: idivq (%r10) # sched: [28:4.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_idiv: -; SKX: # %bb.0: -; SKX-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SKX-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: idivb %dil # sched: [25:10.00] -; SKX-NEXT: idivb (%r8) # sched: [28:4.00] -; SKX-NEXT: idivw %si # sched: [102:16.50] -; SKX-NEXT: idivw (%r9) # sched: [28:4.00] -; SKX-NEXT: idivl %edx # sched: [102:16.50] -; SKX-NEXT: idivl (%rax) # sched: [28:4.00] -; SKX-NEXT: idivq %rcx # sched: [102:16.50] -; SKX-NEXT: idivq (%r10) # sched: [28:4.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_idiv: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: idivb %dil # sched: [12:12.00] -; BDVER2-NEXT: idivb (%r8) # sched: [16:12.00] -; BDVER2-NEXT: idivw %si # sched: [15:17.00] -; BDVER2-NEXT: idivw (%r9) # sched: [19:17.00] -; BDVER2-NEXT: idivl %edx # sched: [14:25.00] -; BDVER2-NEXT: idivl (%rax) # sched: [18:25.00] -; BDVER2-NEXT: idivq %rcx # sched: [14:14.00] -; BDVER2-NEXT: idivq (%r10) # sched: [18:14.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_idiv: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00] -; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: idivb %dil # sched: [12:12.00] -; BTVER2-NEXT: idivb (%r8) # sched: [15:12.00] -; BTVER2-NEXT: idivw %si # sched: [17:17.00] -; BTVER2-NEXT: idivw (%r9) # sched: [20:17.00] -; BTVER2-NEXT: idivl %edx # sched: [25:25.00] -; BTVER2-NEXT: idivl (%rax) # sched: [28:25.00] -; BTVER2-NEXT: idivq %rcx # sched: [41:41.00] -; BTVER2-NEXT: idivq (%r10) # sched: [44:41.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_idiv: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50] -; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: idivb %dil # sched: [15:15.00] -; ZNVER1-NEXT: idivb (%r8) # sched: [19:15.00] -; ZNVER1-NEXT: idivw %si # sched: [17:17.00] -; ZNVER1-NEXT: idivw (%r9) # sched: [21:17.00] -; ZNVER1-NEXT: idivl %edx # sched: [25:25.00] -; ZNVER1-NEXT: idivl (%rax) # sched: [29:25.00] -; ZNVER1-NEXT: idivq %rcx # sched: [41:41.00] -; ZNVER1-NEXT: idivq (%r10) # sched: [45:41.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "idivb $0 \0A\09 idivb $4 \0A\09 idivw $1 \0A\09 idivw $5 \0A\09 idivl $2 \0A\09 idivl $6 \0A\09 idivq $3 \0A\09 idivq $7", "r,r,r,r,*m,*m,*m,*m"(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) nounwind - ret void -} - -define void @test_imul_8(i8 %a0, i8* %a1) optsize { -; GENERIC-LABEL: test_imul_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: imulb %dil # sched: [3:1.00] -; GENERIC-NEXT: imulb (%rsi) # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_imul_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: imulb %dil # sched: [7:3.50] -; ATOM-NEXT: imulb (%rsi) # sched: [7:3.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_imul_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: imulb %dil # sched: [3:1.00] -; SLM-NEXT: imulb (%rsi) # sched: [6:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_imul_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: imulb %dil # sched: [3:1.00] -; SANDY-NEXT: imulb (%rsi) # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_imul_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: imulb %dil # sched: [3:1.00] -; HASWELL-NEXT: imulb (%rsi) # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_imul_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: imulb %dil # sched: [3:1.00] -; BROADWELL-NEXT: imulb (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_imul_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: imulb %dil # sched: [3:1.00] -; SKYLAKE-NEXT: imulb (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_imul_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: imulb %dil # sched: [3:1.00] -; SKX-NEXT: imulb (%rsi) # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_imul_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: imulb %dil # sched: [4:1.00] -; BDVER2-NEXT: imulb (%rsi) # sched: [8:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_imul_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: imulb %dil # sched: [3:1.00] -; BTVER2-NEXT: imulb (%rsi) # sched: [6:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_imul_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: imulb %dil # sched: [4:1.00] -; ZNVER1-NEXT: imulb (%rsi) # sched: [8:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "imulb $0 \0A\09 imulb $1", "r,*m"(i8 %a0, i8* %a1) nounwind - ret void -} -define void @test_imul_16(i16 %a0, i16* %a1, i16 %a2) optsize { -; GENERIC-LABEL: test_imul_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: imulw %di # sched: [4:1.33] -; GENERIC-NEXT: imulw (%rsi) # sched: [9:1.33] -; GENERIC-NEXT: imulw %dx, %di # sched: [3:1.00] -; GENERIC-NEXT: imulw (%rsi), %di # sched: [8:1.00] -; GENERIC-NEXT: imulw $511, %di, %di # imm = 0x1FF -; GENERIC-NEXT: # sched: [4:1.00] -; GENERIC-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; GENERIC-NEXT: # sched: [8:1.00] -; GENERIC-NEXT: imulw $7, %di, %di # sched: [4:1.00] -; GENERIC-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_imul_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: imulw %di # sched: [7:3.50] -; ATOM-NEXT: imulw (%rsi) # sched: [8:4.00] -; ATOM-NEXT: imulw %dx, %di # sched: [6:3.00] -; ATOM-NEXT: imulw (%rsi), %di # sched: [7:3.50] -; ATOM-NEXT: imulw $511, %di, %di # imm = 0x1FF -; ATOM-NEXT: # sched: [6:3.00] -; ATOM-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; ATOM-NEXT: # sched: [7:3.50] -; ATOM-NEXT: imulw $7, %di, %di # sched: [6:3.00] -; ATOM-NEXT: imulw $7, (%rsi), %di # sched: [7:3.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_imul_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: imulw %di # sched: [3:1.00] -; SLM-NEXT: imulw (%rsi) # sched: [6:1.00] -; SLM-NEXT: imulw %dx, %di # sched: [3:1.00] -; SLM-NEXT: imulw (%rsi), %di # sched: [6:1.00] -; SLM-NEXT: imulw $511, %di, %di # imm = 0x1FF -; SLM-NEXT: # sched: [3:1.00] -; SLM-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; SLM-NEXT: # sched: [6:1.00] -; SLM-NEXT: imulw $7, %di, %di # sched: [3:1.00] -; SLM-NEXT: imulw $7, (%rsi), %di # sched: [6:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_imul_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: imulw %di # sched: [4:1.33] -; SANDY-NEXT: imulw (%rsi) # sched: [9:1.33] -; SANDY-NEXT: imulw %dx, %di # sched: [3:1.00] -; SANDY-NEXT: imulw (%rsi), %di # sched: [8:1.00] -; SANDY-NEXT: imulw $511, %di, %di # imm = 0x1FF -; SANDY-NEXT: # sched: [4:1.00] -; SANDY-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; SANDY-NEXT: # sched: [8:1.00] -; SANDY-NEXT: imulw $7, %di, %di # sched: [4:1.00] -; SANDY-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_imul_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: imulw %di # sched: [4:1.00] -; HASWELL-NEXT: imulw (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: imulw %dx, %di # sched: [3:1.00] -; HASWELL-NEXT: imulw (%rsi), %di # sched: [8:1.00] -; HASWELL-NEXT: imulw $511, %di, %di # imm = 0x1FF -; HASWELL-NEXT: # sched: [4:1.00] -; HASWELL-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; HASWELL-NEXT: # sched: [8:1.00] -; HASWELL-NEXT: imulw $7, %di, %di # sched: [4:1.00] -; HASWELL-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_imul_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: imulw %di # sched: [4:1.00] -; BROADWELL-NEXT: imulw (%rsi) # sched: [9:1.00] -; BROADWELL-NEXT: imulw %dx, %di # sched: [3:1.00] -; BROADWELL-NEXT: imulw (%rsi), %di # sched: [8:1.00] -; BROADWELL-NEXT: imulw $511, %di, %di # imm = 0x1FF -; BROADWELL-NEXT: # sched: [4:1.00] -; BROADWELL-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; BROADWELL-NEXT: # sched: [8:1.00] -; BROADWELL-NEXT: imulw $7, %di, %di # sched: [4:1.00] -; BROADWELL-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_imul_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: imulw %di # sched: [4:1.00] -; SKYLAKE-NEXT: imulw (%rsi) # sched: [9:1.00] -; SKYLAKE-NEXT: imulw %dx, %di # sched: [3:1.00] -; SKYLAKE-NEXT: imulw (%rsi), %di # sched: [8:1.00] -; SKYLAKE-NEXT: imulw $511, %di, %di # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [4:1.00] -; SKYLAKE-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [8:1.00] -; SKYLAKE-NEXT: imulw $7, %di, %di # sched: [4:1.00] -; SKYLAKE-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_imul_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: imulw %di # sched: [4:1.00] -; SKX-NEXT: imulw (%rsi) # sched: [9:1.00] -; SKX-NEXT: imulw %dx, %di # sched: [3:1.00] -; SKX-NEXT: imulw (%rsi), %di # sched: [8:1.00] -; SKX-NEXT: imulw $511, %di, %di # imm = 0x1FF -; SKX-NEXT: # sched: [4:1.00] -; SKX-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; SKX-NEXT: # sched: [8:1.00] -; SKX-NEXT: imulw $7, %di, %di # sched: [4:1.00] -; SKX-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_imul_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: imulw %di # sched: [4:1.00] -; BDVER2-NEXT: imulw (%rsi) # sched: [8:1.00] -; BDVER2-NEXT: imulw %dx, %di # sched: [4:1.00] -; BDVER2-NEXT: imulw (%rsi), %di # sched: [8:1.00] -; BDVER2-NEXT: imulw $511, %di, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [5:1.00] -; BDVER2-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [9:1.00] -; BDVER2-NEXT: imulw $7, %di, %di # sched: [5:1.00] -; BDVER2-NEXT: imulw $7, (%rsi), %di # sched: [9:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_imul_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: imulw %di # sched: [3:1.00] -; BTVER2-NEXT: imulw (%rsi) # sched: [6:1.00] -; BTVER2-NEXT: imulw %dx, %di # sched: [3:1.00] -; BTVER2-NEXT: imulw (%rsi), %di # sched: [6:1.00] -; BTVER2-NEXT: imulw $511, %di, %di # imm = 0x1FF -; BTVER2-NEXT: # sched: [3:1.00] -; BTVER2-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; BTVER2-NEXT: # sched: [6:1.00] -; BTVER2-NEXT: imulw $7, %di, %di # sched: [3:1.00] -; BTVER2-NEXT: imulw $7, (%rsi), %di # sched: [6:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_imul_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: imulw %di # sched: [3:1.00] -; ZNVER1-NEXT: imulw (%rsi) # sched: [8:1.00] -; ZNVER1-NEXT: imulw %dx, %di # sched: [3:1.00] -; ZNVER1-NEXT: imulw (%rsi), %di # sched: [3:1.00] -; ZNVER1-NEXT: imulw $511, %di, %di # imm = 0x1FF -; ZNVER1-NEXT: # sched: [3:1.00] -; ZNVER1-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; ZNVER1-NEXT: # sched: [3:1.00] -; ZNVER1-NEXT: imulw $7, %di, %di # sched: [3:1.00] -; ZNVER1-NEXT: imulw $7, (%rsi), %di # sched: [3:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "imulw $0 \0A\09 imulw $2 \0A\09 imulw $1, $0 \0A\09 imulw $2, $0 \0A\09 imulw $3, $0, $0 \0A\09 imulw $3, $2, $0 \0A\09 imulw $4, $0, $0 \0A\09 imulw $4, $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind - ret void -} -define void @test_imul_32(i32 %a0, i32* %a1, i32 %a2) optsize { -; GENERIC-LABEL: test_imul_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: imull %edi # sched: [4:1.00] -; GENERIC-NEXT: imull (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: imull %edx, %edi # sched: [3:1.00] -; GENERIC-NEXT: imull (%rsi), %edi # sched: [8:1.00] -; GENERIC-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [3:1.00] -; GENERIC-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [8:1.00] -; GENERIC-NEXT: imull $7, %edi, %edi # sched: [3:1.00] -; GENERIC-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_imul_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: imull %edi # sched: [6:3.00] -; ATOM-NEXT: imull (%rsi) # sched: [7:3.50] -; ATOM-NEXT: imull %edx, %edi # sched: [5:5.00] -; ATOM-NEXT: imull (%rsi), %edi # sched: [5:5.00] -; ATOM-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [5:5.00] -; ATOM-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [5:5.00] -; ATOM-NEXT: imull $7, %edi, %edi # sched: [5:5.00] -; ATOM-NEXT: imull $7, (%rsi), %edi # sched: [5:5.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_imul_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: imull %edi # sched: [3:1.00] -; SLM-NEXT: imull (%rsi) # sched: [6:1.00] -; SLM-NEXT: imull %edx, %edi # sched: [3:1.00] -; SLM-NEXT: imull (%rsi), %edi # sched: [6:1.00] -; SLM-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 -; SLM-NEXT: # sched: [3:1.00] -; SLM-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 -; SLM-NEXT: # sched: [6:1.00] -; SLM-NEXT: imull $7, %edi, %edi # sched: [3:1.00] -; SLM-NEXT: imull $7, (%rsi), %edi # sched: [6:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_imul_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: imull %edi # sched: [4:1.00] -; SANDY-NEXT: imull (%rsi) # sched: [9:1.00] -; SANDY-NEXT: imull %edx, %edi # sched: [3:1.00] -; SANDY-NEXT: imull (%rsi), %edi # sched: [8:1.00] -; SANDY-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [3:1.00] -; SANDY-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [8:1.00] -; SANDY-NEXT: imull $7, %edi, %edi # sched: [3:1.00] -; SANDY-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_imul_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: imull %edi # sched: [4:1.00] -; HASWELL-NEXT: imull (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: imull %edx, %edi # sched: [3:1.00] -; HASWELL-NEXT: imull (%rsi), %edi # sched: [8:1.00] -; HASWELL-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [3:1.00] -; HASWELL-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [8:1.00] -; HASWELL-NEXT: imull $7, %edi, %edi # sched: [3:1.00] -; HASWELL-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_imul_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: imull %edi # sched: [4:1.00] -; BROADWELL-NEXT: imull (%rsi) # sched: [9:1.00] -; BROADWELL-NEXT: imull %edx, %edi # sched: [3:1.00] -; BROADWELL-NEXT: imull (%rsi), %edi # sched: [8:1.00] -; BROADWELL-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [3:1.00] -; BROADWELL-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [8:1.00] -; BROADWELL-NEXT: imull $7, %edi, %edi # sched: [3:1.00] -; BROADWELL-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_imul_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: imull %edi # sched: [4:1.00] -; SKYLAKE-NEXT: imull (%rsi) # sched: [9:1.00] -; SKYLAKE-NEXT: imull %edx, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: imull (%rsi), %edi # sched: [8:1.00] -; SKYLAKE-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [3:1.00] -; SKYLAKE-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [8:1.00] -; SKYLAKE-NEXT: imull $7, %edi, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_imul_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: imull %edi # sched: [4:1.00] -; SKX-NEXT: imull (%rsi) # sched: [9:1.00] -; SKX-NEXT: imull %edx, %edi # sched: [3:1.00] -; SKX-NEXT: imull (%rsi), %edi # sched: [8:1.00] -; SKX-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 -; SKX-NEXT: # sched: [3:1.00] -; SKX-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 -; SKX-NEXT: # sched: [8:1.00] -; SKX-NEXT: imull $7, %edi, %edi # sched: [3:1.00] -; SKX-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_imul_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: imull %edi # sched: [4:1.00] -; BDVER2-NEXT: imull (%rsi) # sched: [8:1.00] -; BDVER2-NEXT: imull %edx, %edi # sched: [4:1.00] -; BDVER2-NEXT: imull (%rsi), %edi # sched: [8:1.00] -; BDVER2-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [4:1.00] -; BDVER2-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [8:1.00] -; BDVER2-NEXT: imull $7, %edi, %edi # sched: [4:1.00] -; BDVER2-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_imul_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: imull %edi # sched: [3:1.00] -; BTVER2-NEXT: imull (%rsi) # sched: [6:1.00] -; BTVER2-NEXT: imull %edx, %edi # sched: [3:1.00] -; BTVER2-NEXT: imull (%rsi), %edi # sched: [6:1.00] -; BTVER2-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [3:1.00] -; BTVER2-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [6:1.00] -; BTVER2-NEXT: imull $7, %edi, %edi # sched: [3:1.00] -; BTVER2-NEXT: imull $7, (%rsi), %edi # sched: [6:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_imul_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: imull %edi # sched: [3:1.00] -; ZNVER1-NEXT: imull (%rsi) # sched: [8:1.00] -; ZNVER1-NEXT: imull %edx, %edi # sched: [3:1.00] -; ZNVER1-NEXT: imull (%rsi), %edi # sched: [3:1.00] -; ZNVER1-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [3:1.00] -; ZNVER1-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [3:1.00] -; ZNVER1-NEXT: imull $7, %edi, %edi # sched: [3:1.00] -; ZNVER1-NEXT: imull $7, (%rsi), %edi # sched: [3:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "imull $0 \0A\09 imull $2 \0A\09 imull $1, $0 \0A\09 imull $2, $0 \0A\09 imull $3, $0, $0 \0A\09 imull $3, $2, $0 \0A\09 imull $4, $0, $0 \0A\09 imull $4, $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind - ret void -} -define void @test_imul_64(i64 %a0, i64* %a1, i64 %a2) optsize { -; GENERIC-LABEL: test_imul_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: imulq %rdi # sched: [4:1.00] -; GENERIC-NEXT: imulq (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: imulq %rdx, %rdi # sched: [3:1.00] -; GENERIC-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] -; GENERIC-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [3:1.00] -; GENERIC-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [8:1.00] -; GENERIC-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00] -; GENERIC-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_imul_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: imulq %rdi # sched: [12:6.00] -; ATOM-NEXT: imulq (%rsi) # sched: [12:6.00] -; ATOM-NEXT: imulq %rdx, %rdi # sched: [12:6.00] -; ATOM-NEXT: imulq (%rsi), %rdi # sched: [12:6.00] -; ATOM-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [14:7.00] -; ATOM-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [14:7.00] -; ATOM-NEXT: imulq $7, %rdi, %rdi # sched: [14:7.00] -; ATOM-NEXT: imulq $7, (%rsi), %rdi # sched: [14:7.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_imul_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: imulq %rdi # sched: [3:1.00] -; SLM-NEXT: imulq (%rsi) # sched: [6:1.00] -; SLM-NEXT: imulq %rdx, %rdi # sched: [3:1.00] -; SLM-NEXT: imulq (%rsi), %rdi # sched: [6:1.00] -; SLM-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 -; SLM-NEXT: # sched: [3:1.00] -; SLM-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; SLM-NEXT: # sched: [6:1.00] -; SLM-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00] -; SLM-NEXT: imulq $7, (%rsi), %rdi # sched: [6:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_imul_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: imulq %rdi # sched: [4:1.00] -; SANDY-NEXT: imulq (%rsi) # sched: [9:1.00] -; SANDY-NEXT: imulq %rdx, %rdi # sched: [3:1.00] -; SANDY-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] -; SANDY-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [3:1.00] -; SANDY-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [8:1.00] -; SANDY-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00] -; SANDY-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_imul_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: imulq %rdi # sched: [4:1.00] -; HASWELL-NEXT: imulq (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: imulq %rdx, %rdi # sched: [3:1.00] -; HASWELL-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] -; HASWELL-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [3:1.00] -; HASWELL-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [8:1.00] -; HASWELL-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00] -; HASWELL-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_imul_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: imulq %rdi # sched: [4:1.00] -; BROADWELL-NEXT: imulq (%rsi) # sched: [9:1.00] -; BROADWELL-NEXT: imulq %rdx, %rdi # sched: [3:1.00] -; BROADWELL-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] -; BROADWELL-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [3:1.00] -; BROADWELL-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [8:1.00] -; BROADWELL-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00] -; BROADWELL-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_imul_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: imulq %rdi # sched: [4:1.00] -; SKYLAKE-NEXT: imulq (%rsi) # sched: [9:1.00] -; SKYLAKE-NEXT: imulq %rdx, %rdi # sched: [3:1.00] -; SKYLAKE-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] -; SKYLAKE-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [3:1.00] -; SKYLAKE-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [8:1.00] -; SKYLAKE-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00] -; SKYLAKE-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_imul_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: imulq %rdi # sched: [4:1.00] -; SKX-NEXT: imulq (%rsi) # sched: [9:1.00] -; SKX-NEXT: imulq %rdx, %rdi # sched: [3:1.00] -; SKX-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] -; SKX-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 -; SKX-NEXT: # sched: [3:1.00] -; SKX-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; SKX-NEXT: # sched: [8:1.00] -; SKX-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00] -; SKX-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_imul_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: imulq %rdi # sched: [6:4.00] -; BDVER2-NEXT: imulq (%rsi) # sched: [10:4.00] -; BDVER2-NEXT: imulq %rdx, %rdi # sched: [6:4.00] -; BDVER2-NEXT: imulq (%rsi), %rdi # sched: [10:4.00] -; BDVER2-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:4.00] -; BDVER2-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [10:4.00] -; BDVER2-NEXT: imulq $7, %rdi, %rdi # sched: [6:4.00] -; BDVER2-NEXT: imulq $7, (%rsi), %rdi # sched: [10:4.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_imul_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: imulq %rdi # sched: [6:4.00] -; BTVER2-NEXT: imulq (%rsi) # sched: [9:4.00] -; BTVER2-NEXT: imulq %rdx, %rdi # sched: [6:4.00] -; BTVER2-NEXT: imulq (%rsi), %rdi # sched: [9:4.00] -; BTVER2-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [6:4.00] -; BTVER2-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [9:4.00] -; BTVER2-NEXT: imulq $7, %rdi, %rdi # sched: [6:4.00] -; BTVER2-NEXT: imulq $7, (%rsi), %rdi # sched: [9:4.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_imul_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: imulq %rdi # sched: [4:1.00] -; ZNVER1-NEXT: imulq (%rsi) # sched: [9:1.00] -; ZNVER1-NEXT: imulq %rdx, %rdi # sched: [4:1.00] -; ZNVER1-NEXT: imulq (%rsi), %rdi # sched: [4:1.00] -; ZNVER1-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [4:1.00] -; ZNVER1-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [4:1.00] -; ZNVER1-NEXT: imulq $7, %rdi, %rdi # sched: [4:1.00] -; ZNVER1-NEXT: imulq $7, (%rsi), %rdi # sched: [4:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "imulq $0 \0A\09 imulq $2 \0A\09 imulq $1, $0 \0A\09 imulq $2, $0 \0A\09 imulq $3, $0, $0 \0A\09 imulq $3, $2, $0 \0A\09 imulq $4, $0, $0 \0A\09 imulq $4, $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind - ret void -} - -define void @test_in() optsize { -; GENERIC-LABEL: test_in: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: inb $7, %al # sched: [100:0.33] -; GENERIC-NEXT: inw $7, %ax # sched: [100:0.33] -; GENERIC-NEXT: inl $7, %eax # sched: [100:0.33] -; GENERIC-NEXT: inb %dx, %al # sched: [100:0.33] -; GENERIC-NEXT: inw %dx, %ax # sched: [100:0.33] -; GENERIC-NEXT: inl %dx, %eax # sched: [100:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_in: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: inb $7, %al # sched: [92:46.00] -; ATOM-NEXT: inw $7, %ax # sched: [92:46.00] -; ATOM-NEXT: inl $7, %eax # sched: [92:46.00] -; ATOM-NEXT: inb %dx, %al # sched: [94:47.00] -; ATOM-NEXT: inw %dx, %ax # sched: [94:47.00] -; ATOM-NEXT: inl %dx, %eax # sched: [94:47.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_in: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: inb $7, %al # sched: [100:1.00] -; SLM-NEXT: inw $7, %ax # sched: [100:1.00] -; SLM-NEXT: inl $7, %eax # sched: [100:1.00] -; SLM-NEXT: inb %dx, %al # sched: [100:1.00] -; SLM-NEXT: inw %dx, %ax # sched: [100:1.00] -; SLM-NEXT: inl %dx, %eax # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_in: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: inb $7, %al # sched: [100:0.33] -; SANDY-NEXT: inw $7, %ax # sched: [100:0.33] -; SANDY-NEXT: inl $7, %eax # sched: [100:0.33] -; SANDY-NEXT: inb %dx, %al # sched: [100:0.33] -; SANDY-NEXT: inw %dx, %ax # sched: [100:0.33] -; SANDY-NEXT: inl %dx, %eax # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_in: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: inb $7, %al # sched: [35:5.00] -; HASWELL-NEXT: inw $7, %ax # sched: [35:5.00] -; HASWELL-NEXT: inl $7, %eax # sched: [35:5.00] -; HASWELL-NEXT: inb %dx, %al # sched: [35:5.00] -; HASWELL-NEXT: inw %dx, %ax # sched: [35:5.00] -; HASWELL-NEXT: inl %dx, %eax # sched: [35:5.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_in: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: inb $7, %al # sched: [34:5.00] -; BROADWELL-NEXT: inw $7, %ax # sched: [34:5.00] -; BROADWELL-NEXT: inl $7, %eax # sched: [34:5.00] -; BROADWELL-NEXT: inb %dx, %al # sched: [34:5.00] -; BROADWELL-NEXT: inw %dx, %ax # sched: [34:5.00] -; BROADWELL-NEXT: inl %dx, %eax # sched: [34:5.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_in: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: inb $7, %al # sched: [35:5.00] -; SKYLAKE-NEXT: inw $7, %ax # sched: [35:5.00] -; SKYLAKE-NEXT: inl $7, %eax # sched: [35:5.00] -; SKYLAKE-NEXT: inb %dx, %al # sched: [35:5.00] -; SKYLAKE-NEXT: inw %dx, %ax # sched: [35:5.00] -; SKYLAKE-NEXT: inl %dx, %eax # sched: [35:5.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_in: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: inb $7, %al # sched: [35:5.00] -; SKX-NEXT: inw $7, %ax # sched: [35:5.00] -; SKX-NEXT: inl $7, %eax # sched: [35:5.00] -; SKX-NEXT: inb %dx, %al # sched: [35:5.00] -; SKX-NEXT: inw %dx, %ax # sched: [35:5.00] -; SKX-NEXT: inl %dx, %eax # sched: [35:5.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_in: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: inb $7, %al # sched: [100:0.50] -; BDVER2-NEXT: inw $7, %ax # sched: [100:0.50] -; BDVER2-NEXT: inl $7, %eax # sched: [100:0.50] -; BDVER2-NEXT: inb %dx, %al # sched: [100:0.50] -; BDVER2-NEXT: inw %dx, %ax # sched: [100:0.50] -; BDVER2-NEXT: inl %dx, %eax # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_in: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: inb $7, %al # sched: [100:0.50] -; BTVER2-NEXT: inw $7, %ax # sched: [100:0.50] -; BTVER2-NEXT: inl $7, %eax # sched: [100:0.50] -; BTVER2-NEXT: inb %dx, %al # sched: [100:0.50] -; BTVER2-NEXT: inw %dx, %ax # sched: [100:0.50] -; BTVER2-NEXT: inl %dx, %eax # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_in: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: inb $7, %al # sched: [100:0.25] -; ZNVER1-NEXT: inw $7, %ax # sched: [100:0.25] -; ZNVER1-NEXT: inl $7, %eax # sched: [100:0.25] -; ZNVER1-NEXT: inb %dx, %al # sched: [100:0.25] -; ZNVER1-NEXT: inw %dx, %ax # sched: [100:0.25] -; ZNVER1-NEXT: inl %dx, %eax # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "inb $0, %AL \0A\09 inw $0, %AX \0A\09 inl $0, %EAX \0A\09 inb %DX, %AL \0A\09 inw %DX, %AX \0A\09 inl %DX, %EAX", "i"(i8 7) nounwind - ret void -} - -define void @test_inc8(i8 %a0, i8* %a1) optsize { -; GENERIC-LABEL: test_inc8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: incb %dil # sched: [1:0.33] -; GENERIC-NEXT: incb (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_inc8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: incb %dil # sched: [1:0.50] -; ATOM-NEXT: incb (%rsi) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_inc8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: incb %dil # sched: [1:0.50] -; SLM-NEXT: incb (%rsi) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_inc8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: incb %dil # sched: [1:0.33] -; SANDY-NEXT: incb (%rsi) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_inc8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: incb %dil # sched: [1:0.25] -; HASWELL-NEXT: incb (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_inc8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: incb %dil # sched: [1:0.25] -; BROADWELL-NEXT: incb (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_inc8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: incb %dil # sched: [1:0.25] -; SKYLAKE-NEXT: incb (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_inc8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: incb %dil # sched: [1:0.25] -; SKX-NEXT: incb (%rsi) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_inc8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: incb %dil # sched: [1:0.50] -; BDVER2-NEXT: incb (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_inc8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: incb %dil # sched: [1:0.50] -; BTVER2-NEXT: incb (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_inc8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: incb %dil # sched: [1:0.25] -; ZNVER1-NEXT: incb (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "incb $0 \0A\09 incb $1", "r,*m"(i8 %a0, i8* %a1) nounwind - ret void -} -define void @test_inc16(i16 %a0, i16* %a1) optsize { -; GENERIC-LABEL: test_inc16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: incw %di # sched: [1:0.33] -; GENERIC-NEXT: incw (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_inc16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: incw %di # sched: [1:0.50] -; ATOM-NEXT: incw (%rsi) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_inc16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: incw %di # sched: [1:0.50] -; SLM-NEXT: incw (%rsi) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_inc16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: incw %di # sched: [1:0.33] -; SANDY-NEXT: incw (%rsi) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_inc16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: incw %di # sched: [1:0.25] -; HASWELL-NEXT: incw (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_inc16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: incw %di # sched: [1:0.25] -; BROADWELL-NEXT: incw (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_inc16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: incw %di # sched: [1:0.25] -; SKYLAKE-NEXT: incw (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_inc16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: incw %di # sched: [1:0.25] -; SKX-NEXT: incw (%rsi) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_inc16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: incw %di # sched: [1:0.50] -; BDVER2-NEXT: incw (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_inc16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: incw %di # sched: [1:0.50] -; BTVER2-NEXT: incw (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_inc16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: incw %di # sched: [1:0.25] -; ZNVER1-NEXT: incw (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "incw $0 \0A\09 incw $1", "r,*m"(i16 %a0, i16* %a1) nounwind - ret void -} -define void @test_inc32(i32 %a0, i32* %a1) optsize { -; GENERIC-LABEL: test_inc32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: incl %edi # sched: [1:0.33] -; GENERIC-NEXT: incl (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_inc32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: incl %edi # sched: [1:0.50] -; ATOM-NEXT: incl (%rsi) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_inc32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: incl %edi # sched: [1:0.50] -; SLM-NEXT: incl (%rsi) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_inc32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: incl %edi # sched: [1:0.33] -; SANDY-NEXT: incl (%rsi) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_inc32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: incl %edi # sched: [1:0.25] -; HASWELL-NEXT: incl (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_inc32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: incl %edi # sched: [1:0.25] -; BROADWELL-NEXT: incl (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_inc32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: incl %edi # sched: [1:0.25] -; SKYLAKE-NEXT: incl (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_inc32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: incl %edi # sched: [1:0.25] -; SKX-NEXT: incl (%rsi) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_inc32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: incl %edi # sched: [1:0.50] -; BDVER2-NEXT: incl (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_inc32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: incl %edi # sched: [1:0.50] -; BTVER2-NEXT: incl (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_inc32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: incl %edi # sched: [1:0.25] -; ZNVER1-NEXT: incl (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "incl $0 \0A\09 incl $1", "r,*m"(i32 %a0, i32* %a1) nounwind - ret void -} -define void @test_inc64(i64 %a0, i64* %a1) optsize { -; GENERIC-LABEL: test_inc64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: incq %rdi # sched: [1:0.33] -; GENERIC-NEXT: incq (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_inc64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: incq %rdi # sched: [1:0.50] -; ATOM-NEXT: incq (%rsi) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_inc64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: incq %rdi # sched: [1:0.50] -; SLM-NEXT: incq (%rsi) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_inc64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: incq %rdi # sched: [1:0.33] -; SANDY-NEXT: incq (%rsi) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_inc64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: incq %rdi # sched: [1:0.25] -; HASWELL-NEXT: incq (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_inc64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: incq %rdi # sched: [1:0.25] -; BROADWELL-NEXT: incq (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_inc64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: incq %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: incq (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_inc64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: incq %rdi # sched: [1:0.25] -; SKX-NEXT: incq (%rsi) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_inc64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: incq %rdi # sched: [1:0.50] -; BDVER2-NEXT: incq (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_inc64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: incq %rdi # sched: [1:0.50] -; BTVER2-NEXT: incq (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_inc64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: incq %rdi # sched: [1:0.25] -; ZNVER1-NEXT: incq (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "incq $0 \0A\09 incq $1", "r,*m"(i64 %a0, i64* %a1) nounwind - ret void -} - -define void @test_ins() optsize { -; GENERIC-LABEL: test_ins: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.33] -; GENERIC-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.33] -; GENERIC-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_ins: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: insb %dx, %es:(%rdi) # sched: [59:29.50] -; ATOM-NEXT: insw %dx, %es:(%rdi) # sched: [59:29.50] -; ATOM-NEXT: insl %dx, %es:(%rdi) # sched: [59:29.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_ins: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: insb %dx, %es:(%rdi) # sched: [100:1.00] -; SLM-NEXT: insw %dx, %es:(%rdi) # sched: [100:1.00] -; SLM-NEXT: insl %dx, %es:(%rdi) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_ins: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.33] -; SANDY-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.33] -; SANDY-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_ins: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: insb %dx, %es:(%rdi) # sched: [21:1.25] -; HASWELL-NEXT: insw %dx, %es:(%rdi) # sched: [21:1.25] -; HASWELL-NEXT: insl %dx, %es:(%rdi) # sched: [21:1.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ins: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: insb %dx, %es:(%rdi) # sched: [20:1.25] -; BROADWELL-NEXT: insw %dx, %es:(%rdi) # sched: [20:1.25] -; BROADWELL-NEXT: insl %dx, %es:(%rdi) # sched: [20:1.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ins: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: insb %dx, %es:(%rdi) # sched: [20:1.25] -; SKYLAKE-NEXT: insw %dx, %es:(%rdi) # sched: [20:1.25] -; SKYLAKE-NEXT: insl %dx, %es:(%rdi) # sched: [20:1.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_ins: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: insb %dx, %es:(%rdi) # sched: [20:1.25] -; SKX-NEXT: insw %dx, %es:(%rdi) # sched: [20:1.25] -; SKX-NEXT: insl %dx, %es:(%rdi) # sched: [20:1.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_ins: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.50] -; BDVER2-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.50] -; BDVER2-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_ins: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.50] -; BTVER2-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.50] -; BTVER2-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ins: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "insb \0A\09 insw \0A\09 insl", ""() - ret void -} - -define void @test_int() optsize { -; GENERIC-LABEL: test_int: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: int $7 # sched: [100:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_int: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: int $7 # sched: [127:63.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_int: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: int $7 # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_int: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: int $7 # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_int: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: int $7 # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_int: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: int $7 # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_int: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: int $7 # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_int: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: int $7 # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_int: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: int $7 # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_int: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: int $7 # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_int: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: int $7 # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "int $0", "i"(i8 7) - ret void -} - -define void @test_invlpg_invlpga(i8 *%a0) optsize { -; GENERIC-LABEL: test_invlpg_invlpga: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: invlpg (%rdi) # sched: [100:0.33] -; GENERIC-NEXT: invlpga %rax, %ecx # sched: [100:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_invlpg_invlpga: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: invlpg (%rdi) # sched: [71:35.50] -; ATOM-NEXT: invlpga %rax, %ecx # sched: [71:35.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_invlpg_invlpga: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: invlpg (%rdi) # sched: [100:1.00] -; SLM-NEXT: invlpga %rax, %ecx # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_invlpg_invlpga: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: invlpg (%rdi) # sched: [100:0.33] -; SANDY-NEXT: invlpga %rax, %ecx # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_invlpg_invlpga: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: invlpg (%rdi) # sched: [100:0.25] -; HASWELL-NEXT: invlpga %rax, %ecx # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_invlpg_invlpga: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: invlpg (%rdi) # sched: [100:0.25] -; BROADWELL-NEXT: invlpga %rax, %ecx # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_invlpg_invlpga: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: invlpg (%rdi) # sched: [100:0.25] -; SKYLAKE-NEXT: invlpga %rax, %ecx # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_invlpg_invlpga: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: invlpg (%rdi) # sched: [100:0.25] -; SKX-NEXT: invlpga %rax, %ecx # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_invlpg_invlpga: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: invlpg (%rdi) # sched: [100:0.50] -; BDVER2-NEXT: invlpga %rax, %ecx # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_invlpg_invlpga: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: invlpg (%rdi) # sched: [100:0.50] -; BTVER2-NEXT: invlpga %rax, %ecx # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_invlpg_invlpga: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: invlpg (%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: invlpga %rax, %ecx # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm sideeffect "invlpg $0 \0A\09 invlpga %rax, %ecx", "*m"(i8 *%a0) nounwind - ret void -} - -define void @test_jcc() optsize { -; GENERIC-LABEL: test_jcc: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: JCCTGT: -; GENERIC-NEXT: jo JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jno JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jb JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jb JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jb JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jae JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jae JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jae JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: je JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: je JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jne JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jne JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jbe JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jbe JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: ja JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: ja JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: js JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jns JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jp JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jp JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jnp JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jnp JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jl JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jl JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jge JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jge JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jle JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jle JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jg JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: jg JCCTGT # sched: [1:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_jcc: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: JCCTGT: -; ATOM-NEXT: jo JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jno JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jb JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jb JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jb JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jae JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jae JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jae JCCTGT # sched: [1:1.00] -; ATOM-NEXT: je JCCTGT # sched: [1:1.00] -; ATOM-NEXT: je JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jne JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jne JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jbe JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jbe JCCTGT # sched: [1:1.00] -; ATOM-NEXT: ja JCCTGT # sched: [1:1.00] -; ATOM-NEXT: ja JCCTGT # sched: [1:1.00] -; ATOM-NEXT: js JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jns JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jp JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jp JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jnp JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jnp JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jl JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jl JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jge JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jge JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jle JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jle JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jg JCCTGT # sched: [1:1.00] -; ATOM-NEXT: jg JCCTGT # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_jcc: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: JCCTGT: -; SLM-NEXT: jo JCCTGT # sched: [1:1.00] -; SLM-NEXT: jno JCCTGT # sched: [1:1.00] -; SLM-NEXT: jb JCCTGT # sched: [1:1.00] -; SLM-NEXT: jb JCCTGT # sched: [1:1.00] -; SLM-NEXT: jb JCCTGT # sched: [1:1.00] -; SLM-NEXT: jae JCCTGT # sched: [1:1.00] -; SLM-NEXT: jae JCCTGT # sched: [1:1.00] -; SLM-NEXT: jae JCCTGT # sched: [1:1.00] -; SLM-NEXT: je JCCTGT # sched: [1:1.00] -; SLM-NEXT: je JCCTGT # sched: [1:1.00] -; SLM-NEXT: jne JCCTGT # sched: [1:1.00] -; SLM-NEXT: jne JCCTGT # sched: [1:1.00] -; SLM-NEXT: jbe JCCTGT # sched: [1:1.00] -; SLM-NEXT: jbe JCCTGT # sched: [1:1.00] -; SLM-NEXT: ja JCCTGT # sched: [1:1.00] -; SLM-NEXT: ja JCCTGT # sched: [1:1.00] -; SLM-NEXT: js JCCTGT # sched: [1:1.00] -; SLM-NEXT: jns JCCTGT # sched: [1:1.00] -; SLM-NEXT: jp JCCTGT # sched: [1:1.00] -; SLM-NEXT: jp JCCTGT # sched: [1:1.00] -; SLM-NEXT: jnp JCCTGT # sched: [1:1.00] -; SLM-NEXT: jnp JCCTGT # sched: [1:1.00] -; SLM-NEXT: jl JCCTGT # sched: [1:1.00] -; SLM-NEXT: jl JCCTGT # sched: [1:1.00] -; SLM-NEXT: jge JCCTGT # sched: [1:1.00] -; SLM-NEXT: jge JCCTGT # sched: [1:1.00] -; SLM-NEXT: jle JCCTGT # sched: [1:1.00] -; SLM-NEXT: jle JCCTGT # sched: [1:1.00] -; SLM-NEXT: jg JCCTGT # sched: [1:1.00] -; SLM-NEXT: jg JCCTGT # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_jcc: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: JCCTGT: -; SANDY-NEXT: jo JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jno JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jb JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jb JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jb JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jae JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jae JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jae JCCTGT # sched: [1:1.00] -; SANDY-NEXT: je JCCTGT # sched: [1:1.00] -; SANDY-NEXT: je JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jne JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jne JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jbe JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jbe JCCTGT # sched: [1:1.00] -; SANDY-NEXT: ja JCCTGT # sched: [1:1.00] -; SANDY-NEXT: ja JCCTGT # sched: [1:1.00] -; SANDY-NEXT: js JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jns JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jp JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jp JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jnp JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jnp JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jl JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jl JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jge JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jge JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jle JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jle JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jg JCCTGT # sched: [1:1.00] -; SANDY-NEXT: jg JCCTGT # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_jcc: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: JCCTGT: -; HASWELL-NEXT: jo JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jno JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jb JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jb JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jb JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jae JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jae JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jae JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: je JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: je JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jne JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jne JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jbe JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jbe JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: ja JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: ja JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: js JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jns JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jp JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jp JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jnp JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jnp JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jl JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jl JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jge JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jge JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jle JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jle JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jg JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: jg JCCTGT # sched: [1:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_jcc: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: JCCTGT: -; BROADWELL-NEXT: jo JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jno JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jb JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jb JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jb JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jae JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jae JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jae JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: je JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: je JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jne JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jne JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jbe JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jbe JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: ja JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: ja JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: js JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jns JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jp JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jp JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jnp JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jnp JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jl JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jl JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jge JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jge JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jle JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jle JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jg JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: jg JCCTGT # sched: [1:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_jcc: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: JCCTGT: -; SKYLAKE-NEXT: jo JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jno JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jb JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jb JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jb JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jae JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jae JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jae JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: je JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: je JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jne JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jne JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jbe JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jbe JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: ja JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: ja JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: js JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jns JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jp JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jp JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jnp JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jnp JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jl JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jl JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jge JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jge JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jle JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jle JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jg JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: jg JCCTGT # sched: [1:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_jcc: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: JCCTGT: -; SKX-NEXT: jo JCCTGT # sched: [1:0.50] -; SKX-NEXT: jno JCCTGT # sched: [1:0.50] -; SKX-NEXT: jb JCCTGT # sched: [1:0.50] -; SKX-NEXT: jb JCCTGT # sched: [1:0.50] -; SKX-NEXT: jb JCCTGT # sched: [1:0.50] -; SKX-NEXT: jae JCCTGT # sched: [1:0.50] -; SKX-NEXT: jae JCCTGT # sched: [1:0.50] -; SKX-NEXT: jae JCCTGT # sched: [1:0.50] -; SKX-NEXT: je JCCTGT # sched: [1:0.50] -; SKX-NEXT: je JCCTGT # sched: [1:0.50] -; SKX-NEXT: jne JCCTGT # sched: [1:0.50] -; SKX-NEXT: jne JCCTGT # sched: [1:0.50] -; SKX-NEXT: jbe JCCTGT # sched: [1:0.50] -; SKX-NEXT: jbe JCCTGT # sched: [1:0.50] -; SKX-NEXT: ja JCCTGT # sched: [1:0.50] -; SKX-NEXT: ja JCCTGT # sched: [1:0.50] -; SKX-NEXT: js JCCTGT # sched: [1:0.50] -; SKX-NEXT: jns JCCTGT # sched: [1:0.50] -; SKX-NEXT: jp JCCTGT # sched: [1:0.50] -; SKX-NEXT: jp JCCTGT # sched: [1:0.50] -; SKX-NEXT: jnp JCCTGT # sched: [1:0.50] -; SKX-NEXT: jnp JCCTGT # sched: [1:0.50] -; SKX-NEXT: jl JCCTGT # sched: [1:0.50] -; SKX-NEXT: jl JCCTGT # sched: [1:0.50] -; SKX-NEXT: jge JCCTGT # sched: [1:0.50] -; SKX-NEXT: jge JCCTGT # sched: [1:0.50] -; SKX-NEXT: jle JCCTGT # sched: [1:0.50] -; SKX-NEXT: jle JCCTGT # sched: [1:0.50] -; SKX-NEXT: jg JCCTGT # sched: [1:0.50] -; SKX-NEXT: jg JCCTGT # sched: [1:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_jcc: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: JCCTGT: -; BDVER2-NEXT: jo JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jno JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jb JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jb JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jb JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jae JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jae JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jae JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: je JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: je JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jne JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jne JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jbe JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jbe JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: ja JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: ja JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: js JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jns JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jp JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jp JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jnp JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jnp JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jl JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jl JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jge JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jge JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jle JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jle JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jg JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: jg JCCTGT # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_jcc: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: JCCTGT: -; BTVER2-NEXT: jo JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jno JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jb JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jb JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jb JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jae JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jae JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jae JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: je JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: je JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jne JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jne JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jbe JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jbe JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: ja JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: ja JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: js JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jns JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jp JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jp JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jnp JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jnp JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jl JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jl JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jge JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jge JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jle JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jle JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jg JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: jg JCCTGT # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_jcc: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: JCCTGT: -; ZNVER1-NEXT: jo JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jno JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jb JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jb JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jb JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jae JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jae JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jae JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: je JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: je JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jne JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jne JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jbe JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jbe JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: ja JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: ja JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: js JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jns JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jp JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jp JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jnp JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jnp JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jl JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jl JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jge JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jge JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jle JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jle JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jg JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: jg JCCTGT # sched: [1:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "JCCTGT: \0A\09 jo JCCTGT \0A\09 jno JCCTGT \0A\09 jb JCCTGT \0A\09 jc JCCTGT \0A\09 jnae JCCTGT \0A\09 jnb JCCTGT \0A\09 jnc JCCTGT \0A\09 jae JCCTGT \0A\09 jz JCCTGT \0A\09 je JCCTGT \0A\09 jnz JCCTGT \0A\09 jne JCCTGT \0A\09 jbe JCCTGT \0A\09 jna JCCTGT \0A\09 jnbe JCCTGT \0A\09 ja JCCTGT \0A\09 js JCCTGT \0A\09 jns JCCTGT \0A\09 jp JCCTGT \0A\09 jpe JCCTGT \0A\09 jnp JCCTGT \0A\09 jpo JCCTGT \0A\09 jl JCCTGT \0A\09 jnge JCCTGT \0A\09 jnl JCCTGT \0A\09 jge JCCTGT \0A\09 jle JCCTGT \0A\09 jng JCCTGT \0A\09 jnle JCCTGT \0A\09 jg JCCTGT", ""() - ret void -} - -define void @test_jecxz_jrcxz() optsize { -; GENERIC-LABEL: test_jecxz_jrcxz: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: JXTGT: -; GENERIC-NEXT: jecxz JXTGT # sched: [2:1.00] -; GENERIC-NEXT: jrcxz JXTGT # sched: [2:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_jecxz_jrcxz: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: JXTGT: -; ATOM-NEXT: jecxz JXTGT # sched: [4:2.00] -; ATOM-NEXT: jrcxz JXTGT # sched: [4:2.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_jecxz_jrcxz: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: JXTGT: -; SLM-NEXT: jecxz JXTGT # sched: [1:1.00] -; SLM-NEXT: jrcxz JXTGT # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_jecxz_jrcxz: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: JXTGT: -; SANDY-NEXT: jecxz JXTGT # sched: [2:1.00] -; SANDY-NEXT: jrcxz JXTGT # sched: [2:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_jecxz_jrcxz: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: JXTGT: -; HASWELL-NEXT: jecxz JXTGT # sched: [2:0.50] -; HASWELL-NEXT: jrcxz JXTGT # sched: [2:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_jecxz_jrcxz: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: JXTGT: -; BROADWELL-NEXT: jecxz JXTGT # sched: [2:0.50] -; BROADWELL-NEXT: jrcxz JXTGT # sched: [2:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_jecxz_jrcxz: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: JXTGT: -; SKYLAKE-NEXT: jecxz JXTGT # sched: [2:0.50] -; SKYLAKE-NEXT: jrcxz JXTGT # sched: [2:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_jecxz_jrcxz: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: JXTGT: -; SKX-NEXT: jecxz JXTGT # sched: [2:0.50] -; SKX-NEXT: jrcxz JXTGT # sched: [2:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_jecxz_jrcxz: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: JXTGT: -; BDVER2-NEXT: jecxz JXTGT # sched: [1:1.00] -; BDVER2-NEXT: jrcxz JXTGT # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_jecxz_jrcxz: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: JXTGT: -; BTVER2-NEXT: jecxz JXTGT # sched: [1:0.50] -; BTVER2-NEXT: jrcxz JXTGT # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_jecxz_jrcxz: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: JXTGT: -; ZNVER1-NEXT: jecxz JXTGT # sched: [1:0.50] -; ZNVER1-NEXT: jrcxz JXTGT # sched: [1:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "JXTGT: \0A\09 jecxz JXTGT \0A\09 jrcxz JXTGT", ""() - ret void -} - -; TODO - test_jmp - -define void @test_lahf_sahf() optsize { -; GENERIC-LABEL: test_lahf_sahf: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: lahf # sched: [1:0.50] -; GENERIC-NEXT: sahf # sched: [1:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lahf_sahf: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: lahf # sched: [2:1.00] -; ATOM-NEXT: sahf # sched: [2:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lahf_sahf: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: lahf # sched: [1:0.50] -; SLM-NEXT: sahf # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lahf_sahf: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: lahf # sched: [1:0.50] -; SANDY-NEXT: sahf # sched: [1:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lahf_sahf: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: lahf # sched: [1:0.50] -; HASWELL-NEXT: sahf # sched: [1:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lahf_sahf: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: lahf # sched: [1:0.50] -; BROADWELL-NEXT: sahf # sched: [1:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lahf_sahf: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: lahf # sched: [1:0.50] -; SKYLAKE-NEXT: sahf # sched: [1:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_lahf_sahf: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: lahf # sched: [1:0.50] -; SKX-NEXT: sahf # sched: [1:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lahf_sahf: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: lahf # sched: [2:0.50] -; BDVER2-NEXT: sahf # sched: [2:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lahf_sahf: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: lahf # sched: [1:0.50] -; BTVER2-NEXT: sahf # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lahf_sahf: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: lahf # sched: [100:0.25] -; ZNVER1-NEXT: sahf # sched: [2:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "lahf \0A\09 sahf", ""() nounwind - ret void -} - -; TODO - test_lds -; TODO - test_les -; TODO - test_lfs -; TODO - test_lgs -; TODO - test_lss - -; TODO - test_lea - -define void @test_leave() optsize { -; GENERIC-LABEL: test_leave: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: leave # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_leave: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: leave # sched: [2:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_leave: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: leave # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_leave: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: leave # sched: [7:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_leave: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: leave # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_leave: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: leave # sched: [7:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_leave: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: leave # sched: [7:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_leave: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: leave # sched: [7:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_leave: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: leave # sched: [1:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_leave: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: leave # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_leave: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: leave # sched: [8:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "leave", ""() nounwind - ret void -} - -define void @test_lods() optsize { -; GENERIC-LABEL: test_lods: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: lodsb (%rsi), %al # sched: [7:0.67] -; GENERIC-NEXT: lodsw (%rsi), %ax # sched: [7:0.67] -; GENERIC-NEXT: lodsl (%rsi), %eax # sched: [6:0.50] -; GENERIC-NEXT: lodsq (%rsi), %rax # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lods: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: lodsb (%rsi), %al # sched: [2:1.00] -; ATOM-NEXT: lodsw (%rsi), %ax # sched: [2:1.00] -; ATOM-NEXT: lodsl (%rsi), %eax # sched: [2:1.00] -; ATOM-NEXT: lodsq (%rsi), %rax # sched: [2:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lods: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: lodsb (%rsi), %al # sched: [100:1.00] -; SLM-NEXT: lodsw (%rsi), %ax # sched: [100:1.00] -; SLM-NEXT: lodsl (%rsi), %eax # sched: [100:1.00] -; SLM-NEXT: lodsq (%rsi), %rax # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lods: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: lodsb (%rsi), %al # sched: [7:0.67] -; SANDY-NEXT: lodsw (%rsi), %ax # sched: [7:0.67] -; SANDY-NEXT: lodsl (%rsi), %eax # sched: [6:0.50] -; SANDY-NEXT: lodsq (%rsi), %rax # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lods: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: lodsb (%rsi), %al # sched: [1:0.50] -; HASWELL-NEXT: lodsw (%rsi), %ax # sched: [1:0.50] -; HASWELL-NEXT: lodsl (%rsi), %eax # sched: [1:0.50] -; HASWELL-NEXT: lodsq (%rsi), %rax # sched: [1:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lods: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: lodsb (%rsi), %al # sched: [100:0.25] -; BROADWELL-NEXT: lodsw (%rsi), %ax # sched: [100:0.25] -; BROADWELL-NEXT: lodsl (%rsi), %eax # sched: [100:0.25] -; BROADWELL-NEXT: lodsq (%rsi), %rax # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lods: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: lodsb (%rsi), %al # sched: [100:0.25] -; SKYLAKE-NEXT: lodsw (%rsi), %ax # sched: [100:0.25] -; SKYLAKE-NEXT: lodsl (%rsi), %eax # sched: [100:0.25] -; SKYLAKE-NEXT: lodsq (%rsi), %rax # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_lods: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: lodsb (%rsi), %al # sched: [100:0.25] -; SKX-NEXT: lodsw (%rsi), %ax # sched: [100:0.25] -; SKX-NEXT: lodsl (%rsi), %eax # sched: [100:0.25] -; SKX-NEXT: lodsq (%rsi), %rax # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_lods: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: lodsb (%rsi), %al # sched: [100:0.50] -; BDVER2-NEXT: lodsw (%rsi), %ax # sched: [100:0.50] -; BDVER2-NEXT: lodsl (%rsi), %eax # sched: [100:0.50] -; BDVER2-NEXT: lodsq (%rsi), %rax # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_lods: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: lodsb (%rsi), %al # sched: [100:0.50] -; BTVER2-NEXT: lodsw (%rsi), %ax # sched: [100:0.50] -; BTVER2-NEXT: lodsl (%rsi), %eax # sched: [100:0.50] -; BTVER2-NEXT: lodsq (%rsi), %rax # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lods: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: lodsb (%rsi), %al # sched: [100:0.25] -; ZNVER1-NEXT: lodsw (%rsi), %ax # sched: [100:0.25] -; ZNVER1-NEXT: lodsl (%rsi), %eax # sched: [100:0.25] -; ZNVER1-NEXT: lodsq (%rsi), %rax # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "lodsb \0A\09 lodsw \0A\09 lodsl \0A\09 lodsq", ""() - ret void -} - -define void @test_loop() optsize { -; GENERIC-LABEL: test_loop: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: LTGT: -; GENERIC-NEXT: loop LTGT # sched: [1:1.00] -; GENERIC-NEXT: loope LTGT # sched: [1:1.00] -; GENERIC-NEXT: loopne LTGT # sched: [1:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_loop: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: LTGT: -; ATOM-NEXT: loop LTGT # sched: [18:9.00] -; ATOM-NEXT: loope LTGT # sched: [8:4.00] -; ATOM-NEXT: loopne LTGT # sched: [17:8.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_loop: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: LTGT: -; SLM-NEXT: loop LTGT # sched: [1:1.00] -; SLM-NEXT: loope LTGT # sched: [1:1.00] -; SLM-NEXT: loopne LTGT # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_loop: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: LTGT: -; SANDY-NEXT: loop LTGT # sched: [1:1.00] -; SANDY-NEXT: loope LTGT # sched: [1:1.00] -; SANDY-NEXT: loopne LTGT # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_loop: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: LTGT: -; HASWELL-NEXT: loop LTGT # sched: [7:2.00] -; HASWELL-NEXT: loope LTGT # sched: [11:2.75] -; HASWELL-NEXT: loopne LTGT # sched: [11:2.75] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_loop: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: LTGT: -; BROADWELL-NEXT: loop LTGT # sched: [7:2.00] -; BROADWELL-NEXT: loope LTGT # sched: [11:2.75] -; BROADWELL-NEXT: loopne LTGT # sched: [11:2.75] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_loop: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: LTGT: -; SKYLAKE-NEXT: loop LTGT # sched: [7:2.00] -; SKYLAKE-NEXT: loope LTGT # sched: [11:2.75] -; SKYLAKE-NEXT: loopne LTGT # sched: [11:2.75] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_loop: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: LTGT: -; SKX-NEXT: loop LTGT # sched: [7:2.00] -; SKX-NEXT: loope LTGT # sched: [11:2.75] -; SKX-NEXT: loopne LTGT # sched: [11:2.75] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_loop: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: LTGT: -; BDVER2-NEXT: loop LTGT # sched: [1:1.00] -; BDVER2-NEXT: loope LTGT # sched: [1:1.00] -; BDVER2-NEXT: loopne LTGT # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_loop: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: LTGT: -; BTVER2-NEXT: loop LTGT # sched: [1:0.50] -; BTVER2-NEXT: loope LTGT # sched: [1:0.50] -; BTVER2-NEXT: loopne LTGT # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_loop: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: LTGT: -; ZNVER1-NEXT: loop LTGT # sched: [1:0.50] -; ZNVER1-NEXT: loope LTGT # sched: [1:0.50] -; ZNVER1-NEXT: loopne LTGT # sched: [1:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "LTGT: \0A\09 loop LTGT \0A\09 loope LTGT \0A\09 loopne LTGT", ""() - ret void -} - -; TODO - test_mov - -define void @test_movnti(i32 %a0, i32 *%a1, i64 %a2, i64 *%a3) optsize { -; GENERIC-LABEL: test_movnti: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: movntil %edi, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movnti: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: movntil %edi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movnti: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: movntil %edi, (%rsi) # sched: [1:1.00] -; SLM-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_movnti: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: movntil %edi, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movnti: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: movntil %edi, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movnti: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: movntil %edi, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movnti: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: movntil %edi, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movnti: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: movntil %edi, (%rsi) # sched: [1:1.00] -; SKX-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movnti: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: movntil %edi, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movnti: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: movntil %edi, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movnti: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: movntil %edi, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: movntiq %rdx, (%rcx) # sched: [1:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "movnti $0, $1 \0A\09 movnti $2, $3", "r,*m,r,*m"(i32 %a0, i32 *%a1, i64 %a2, i64 *%a3) - ret void -} - -define void @test_movs() optsize { -; GENERIC-LABEL: test_movs: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: movsb (%rsi), %es:(%rdi) # sched: [8:1.00] -; GENERIC-NEXT: movsw (%rsi), %es:(%rdi) # sched: [8:1.00] -; GENERIC-NEXT: movsl (%rsi), %es:(%rdi) # sched: [8:1.00] -; GENERIC-NEXT: movsq (%rsi), %es:(%rdi) # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movs: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: movsb (%rsi), %es:(%rdi) # sched: [3:1.50] -; ATOM-NEXT: movsw (%rsi), %es:(%rdi) # sched: [3:1.50] -; ATOM-NEXT: movsl (%rsi), %es:(%rdi) # sched: [3:1.50] -; ATOM-NEXT: movsq (%rsi), %es:(%rdi) # sched: [3:1.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movs: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:1.00] -; SLM-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:1.00] -; SLM-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:1.00] -; SLM-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_movs: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: movsb (%rsi), %es:(%rdi) # sched: [8:1.00] -; SANDY-NEXT: movsw (%rsi), %es:(%rdi) # sched: [8:1.00] -; SANDY-NEXT: movsl (%rsi), %es:(%rdi) # sched: [8:1.00] -; SANDY-NEXT: movsq (%rsi), %es:(%rdi) # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movs: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: movsb (%rsi), %es:(%rdi) # sched: [4:1.00] -; HASWELL-NEXT: movsw (%rsi), %es:(%rdi) # sched: [4:1.00] -; HASWELL-NEXT: movsl (%rsi), %es:(%rdi) # sched: [4:1.00] -; HASWELL-NEXT: movsq (%rsi), %es:(%rdi) # sched: [4:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movs: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:0.25] -; BROADWELL-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:0.25] -; BROADWELL-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:0.25] -; BROADWELL-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movs: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:0.25] -; SKYLAKE-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:0.25] -; SKYLAKE-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:0.25] -; SKYLAKE-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movs: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:0.25] -; SKX-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:0.25] -; SKX-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:0.25] -; SKX-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movs: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:0.50] -; BDVER2-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:0.50] -; BDVER2-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:0.50] -; BDVER2-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movs: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:0.50] -; BTVER2-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:0.50] -; BTVER2-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:0.50] -; BTVER2-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movs: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "movsb \0A\09 movsw \0A\09 movsl \0A\09 movsq", ""() - ret void -} - -; TODO - test_movsx -; TODO - test_movzx - -define i64 @test_movslq(i32 %a0, i32 *%a1) optsize { -; GENERIC-LABEL: test_movslq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: movslq %edi, %rax # sched: [1:0.33] -; GENERIC-NEXT: movslq (%rsi), %rcx # sched: [5:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movslq: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: movslq %edi, %rax # sched: [1:1.00] -; ATOM-NEXT: movslq (%rsi), %rcx # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: orq %rcx, %rax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movslq: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: movslq %edi, %rax # sched: [1:0.50] -; SLM-NEXT: movslq (%rsi), %rcx # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: orq %rcx, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_movslq: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: movslq %edi, %rax # sched: [1:0.33] -; SANDY-NEXT: movslq (%rsi), %rcx # sched: [5:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: orq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_movslq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: movslq %edi, %rax # sched: [1:0.25] -; HASWELL-NEXT: movslq (%rsi), %rcx # sched: [5:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movslq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: movslq %edi, %rax # sched: [1:0.25] -; BROADWELL-NEXT: movslq (%rsi), %rcx # sched: [5:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movslq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: movslq %edi, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: movslq (%rsi), %rcx # sched: [5:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movslq: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: movslq %edi, %rax # sched: [1:0.25] -; SKX-NEXT: movslq (%rsi), %rcx # sched: [5:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: orq %rcx, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_movslq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: movslq %edi, %rax # sched: [1:0.50] -; BDVER2-NEXT: movslq (%rsi), %rcx # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movslq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: movslq %edi, %rax # sched: [1:0.50] -; BTVER2-NEXT: movslq (%rsi), %rcx # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movslq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: movslq %edi, %rax # sched: [1:0.25] -; ZNVER1-NEXT: movslq (%rsi), %rcx # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call { i64, i64 } asm sideeffect "movslq $2, $0 \0A\09 movslq $3, $1", "=r,=r,r,*m"(i32 %a0, i32 *%a1) - %2 = extractvalue { i64, i64 } %1, 0 - %3 = extractvalue { i64, i64 } %1, 1 - %4 = or i64 %2, %3 - ret i64 %4 -} - -define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) optsize { -; GENERIC-LABEL: test_mul: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; GENERIC-NEXT: #APP -; GENERIC-NEXT: mulb %dil # sched: [3:1.00] -; GENERIC-NEXT: mulb (%r8) # sched: [8:1.00] -; GENERIC-NEXT: mulw %si # sched: [4:1.33] -; GENERIC-NEXT: mulw (%r9) # sched: [9:1.33] -; GENERIC-NEXT: mull %edx # sched: [4:1.00] -; GENERIC-NEXT: mull (%rax) # sched: [9:1.00] -; GENERIC-NEXT: mulq %rcx # sched: [4:1.00] -; GENERIC-NEXT: mulq (%r10) # sched: [9:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_mul: -; ATOM: # %bb.0: -; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [1:1.00] -; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: mulb %dil # sched: [7:3.50] -; ATOM-NEXT: mulb (%r8) # sched: [7:3.50] -; ATOM-NEXT: mulw %si # sched: [7:3.50] -; ATOM-NEXT: mulw (%r9) # sched: [8:4.00] -; ATOM-NEXT: mull %edx # sched: [6:3.00] -; ATOM-NEXT: mull (%rax) # sched: [7:3.50] -; ATOM-NEXT: mulq %rcx # sched: [12:6.00] -; ATOM-NEXT: mulq (%r10) # sched: [12:6.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_mul: -; SLM: # %bb.0: -; SLM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00] -; SLM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: mulb %dil # sched: [3:1.00] -; SLM-NEXT: mulb (%r8) # sched: [6:1.00] -; SLM-NEXT: mulw %si # sched: [3:1.00] -; SLM-NEXT: mulw (%r9) # sched: [6:1.00] -; SLM-NEXT: mull %edx # sched: [3:1.00] -; SLM-NEXT: mull (%rax) # sched: [6:1.00] -; SLM-NEXT: mulq %rcx # sched: [3:1.00] -; SLM-NEXT: mulq (%r10) # sched: [6:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_mul: -; SANDY: # %bb.0: -; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: mulb %dil # sched: [3:1.00] -; SANDY-NEXT: mulb (%r8) # sched: [8:1.00] -; SANDY-NEXT: mulw %si # sched: [4:1.33] -; SANDY-NEXT: mulw (%r9) # sched: [9:1.33] -; SANDY-NEXT: mull %edx # sched: [4:1.00] -; SANDY-NEXT: mull (%rax) # sched: [9:1.00] -; SANDY-NEXT: mulq %rcx # sched: [4:1.00] -; SANDY-NEXT: mulq (%r10) # sched: [9:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_mul: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: mulb %dil # sched: [3:1.00] -; HASWELL-NEXT: mulb (%r8) # sched: [8:1.00] -; HASWELL-NEXT: mulw %si # sched: [4:1.00] -; HASWELL-NEXT: mulw (%r9) # sched: [9:1.00] -; HASWELL-NEXT: mull %edx # sched: [4:1.00] -; HASWELL-NEXT: mull (%rax) # sched: [9:1.00] -; HASWELL-NEXT: mulq %rcx # sched: [4:1.00] -; HASWELL-NEXT: mulq (%r10) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mul: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: mulb %dil # sched: [3:1.00] -; BROADWELL-NEXT: mulb (%r8) # sched: [8:1.00] -; BROADWELL-NEXT: mulw %si # sched: [4:1.00] -; BROADWELL-NEXT: mulw (%r9) # sched: [9:1.00] -; BROADWELL-NEXT: mull %edx # sched: [4:1.00] -; BROADWELL-NEXT: mull (%rax) # sched: [9:1.00] -; BROADWELL-NEXT: mulq %rcx # sched: [4:1.00] -; BROADWELL-NEXT: mulq (%r10) # sched: [9:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mul: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: mulb %dil # sched: [3:1.00] -; SKYLAKE-NEXT: mulb (%r8) # sched: [8:1.00] -; SKYLAKE-NEXT: mulw %si # sched: [4:1.00] -; SKYLAKE-NEXT: mulw (%r9) # sched: [9:1.00] -; SKYLAKE-NEXT: mull %edx # sched: [4:1.00] -; SKYLAKE-NEXT: mull (%rax) # sched: [9:1.00] -; SKYLAKE-NEXT: mulq %rcx # sched: [4:1.00] -; SKYLAKE-NEXT: mulq (%r10) # sched: [9:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mul: -; SKX: # %bb.0: -; SKX-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SKX-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: mulb %dil # sched: [3:1.00] -; SKX-NEXT: mulb (%r8) # sched: [8:1.00] -; SKX-NEXT: mulw %si # sched: [4:1.00] -; SKX-NEXT: mulw (%r9) # sched: [9:1.00] -; SKX-NEXT: mull %edx # sched: [4:1.00] -; SKX-NEXT: mull (%rax) # sched: [9:1.00] -; SKX-NEXT: mulq %rcx # sched: [4:1.00] -; SKX-NEXT: mulq (%r10) # sched: [9:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_mul: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: mulb %dil # sched: [4:1.00] -; BDVER2-NEXT: mulb (%r8) # sched: [8:1.00] -; BDVER2-NEXT: mulw %si # sched: [4:1.00] -; BDVER2-NEXT: mulw (%r9) # sched: [8:1.00] -; BDVER2-NEXT: mull %edx # sched: [4:1.00] -; BDVER2-NEXT: mull (%rax) # sched: [8:1.00] -; BDVER2-NEXT: mulq %rcx # sched: [6:4.00] -; BDVER2-NEXT: mulq (%r10) # sched: [10:4.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_mul: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00] -; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: mulb %dil # sched: [3:1.00] -; BTVER2-NEXT: mulb (%r8) # sched: [6:1.00] -; BTVER2-NEXT: mulw %si # sched: [3:1.00] -; BTVER2-NEXT: mulw (%r9) # sched: [6:1.00] -; BTVER2-NEXT: mull %edx # sched: [3:1.00] -; BTVER2-NEXT: mull (%rax) # sched: [6:1.00] -; BTVER2-NEXT: mulq %rcx # sched: [6:4.00] -; BTVER2-NEXT: mulq (%r10) # sched: [9:4.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_mul: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50] -; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: mulb %dil # sched: [4:1.00] -; ZNVER1-NEXT: mulb (%r8) # sched: [8:1.00] -; ZNVER1-NEXT: mulw %si # sched: [3:1.00] -; ZNVER1-NEXT: mulw (%r9) # sched: [8:1.00] -; ZNVER1-NEXT: mull %edx # sched: [3:1.00] -; ZNVER1-NEXT: mull (%rax) # sched: [8:1.00] -; ZNVER1-NEXT: mulq %rcx # sched: [4:1.00] -; ZNVER1-NEXT: mulq (%r10) # sched: [9:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "mulb $0 \0A\09 mulb $4 \0A\09 mulw $1 \0A\09 mulw $5 \0A\09 mull $2 \0A\09 mull $6 \0A\09 mulq $3 \0A\09 mulq $7", "r,r,r,r,*m,*m,*m,*m"(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) nounwind - ret void -} - -define void @test_neg(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) optsize { -; GENERIC-LABEL: test_neg: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; GENERIC-NEXT: #APP -; GENERIC-NEXT: negb %dil # sched: [1:0.33] -; GENERIC-NEXT: negb (%r8) # sched: [7:1.00] -; GENERIC-NEXT: negw %si # sched: [1:0.33] -; GENERIC-NEXT: negw (%r9) # sched: [7:1.00] -; GENERIC-NEXT: negl %edx # sched: [1:0.33] -; GENERIC-NEXT: negl (%rax) # sched: [7:1.00] -; GENERIC-NEXT: negq %rcx # sched: [1:0.33] -; GENERIC-NEXT: negq (%r10) # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_neg: -; ATOM: # %bb.0: -; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [1:1.00] -; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: negb %dil # sched: [1:0.50] -; ATOM-NEXT: negb (%r8) # sched: [1:1.00] -; ATOM-NEXT: negw %si # sched: [1:0.50] -; ATOM-NEXT: negw (%r9) # sched: [1:1.00] -; ATOM-NEXT: negl %edx # sched: [1:0.50] -; ATOM-NEXT: negl (%rax) # sched: [1:1.00] -; ATOM-NEXT: negq %rcx # sched: [1:0.50] -; ATOM-NEXT: negq (%r10) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_neg: -; SLM: # %bb.0: -; SLM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00] -; SLM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: negb %dil # sched: [1:0.50] -; SLM-NEXT: negb (%r8) # sched: [5:2.00] -; SLM-NEXT: negw %si # sched: [1:0.50] -; SLM-NEXT: negw (%r9) # sched: [5:2.00] -; SLM-NEXT: negl %edx # sched: [1:0.50] -; SLM-NEXT: negl (%rax) # sched: [5:2.00] -; SLM-NEXT: negq %rcx # sched: [1:0.50] -; SLM-NEXT: negq (%r10) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_neg: -; SANDY: # %bb.0: -; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: negb %dil # sched: [1:0.33] -; SANDY-NEXT: negb (%r8) # sched: [7:1.00] -; SANDY-NEXT: negw %si # sched: [1:0.33] -; SANDY-NEXT: negw (%r9) # sched: [7:1.00] -; SANDY-NEXT: negl %edx # sched: [1:0.33] -; SANDY-NEXT: negl (%rax) # sched: [7:1.00] -; SANDY-NEXT: negq %rcx # sched: [1:0.33] -; SANDY-NEXT: negq (%r10) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_neg: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: negb %dil # sched: [1:0.25] -; HASWELL-NEXT: negb (%r8) # sched: [7:1.00] -; HASWELL-NEXT: negw %si # sched: [1:0.25] -; HASWELL-NEXT: negw (%r9) # sched: [7:1.00] -; HASWELL-NEXT: negl %edx # sched: [1:0.25] -; HASWELL-NEXT: negl (%rax) # sched: [7:1.00] -; HASWELL-NEXT: negq %rcx # sched: [1:0.25] -; HASWELL-NEXT: negq (%r10) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_neg: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: negb %dil # sched: [1:0.25] -; BROADWELL-NEXT: negb (%r8) # sched: [7:1.00] -; BROADWELL-NEXT: negw %si # sched: [1:0.25] -; BROADWELL-NEXT: negw (%r9) # sched: [7:1.00] -; BROADWELL-NEXT: negl %edx # sched: [1:0.25] -; BROADWELL-NEXT: negl (%rax) # sched: [7:1.00] -; BROADWELL-NEXT: negq %rcx # sched: [1:0.25] -; BROADWELL-NEXT: negq (%r10) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_neg: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: negb %dil # sched: [1:0.25] -; SKYLAKE-NEXT: negb (%r8) # sched: [7:1.00] -; SKYLAKE-NEXT: negw %si # sched: [1:0.25] -; SKYLAKE-NEXT: negw (%r9) # sched: [7:1.00] -; SKYLAKE-NEXT: negl %edx # sched: [1:0.25] -; SKYLAKE-NEXT: negl (%rax) # sched: [7:1.00] -; SKYLAKE-NEXT: negq %rcx # sched: [1:0.25] -; SKYLAKE-NEXT: negq (%r10) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_neg: -; SKX: # %bb.0: -; SKX-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SKX-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: negb %dil # sched: [1:0.25] -; SKX-NEXT: negb (%r8) # sched: [7:1.00] -; SKX-NEXT: negw %si # sched: [1:0.25] -; SKX-NEXT: negw (%r9) # sched: [7:1.00] -; SKX-NEXT: negl %edx # sched: [1:0.25] -; SKX-NEXT: negl (%rax) # sched: [7:1.00] -; SKX-NEXT: negq %rcx # sched: [1:0.25] -; SKX-NEXT: negq (%r10) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_neg: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: negb %dil # sched: [1:0.50] -; BDVER2-NEXT: negb (%r8) # sched: [6:1.00] -; BDVER2-NEXT: negw %si # sched: [1:0.50] -; BDVER2-NEXT: negw (%r9) # sched: [6:1.00] -; BDVER2-NEXT: negl %edx # sched: [1:0.50] -; BDVER2-NEXT: negl (%rax) # sched: [6:1.00] -; BDVER2-NEXT: negq %rcx # sched: [1:0.50] -; BDVER2-NEXT: negq (%r10) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_neg: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00] -; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: negb %dil # sched: [1:0.50] -; BTVER2-NEXT: negb (%r8) # sched: [5:1.00] -; BTVER2-NEXT: negw %si # sched: [1:0.50] -; BTVER2-NEXT: negw (%r9) # sched: [5:1.00] -; BTVER2-NEXT: negl %edx # sched: [1:0.50] -; BTVER2-NEXT: negl (%rax) # sched: [5:1.00] -; BTVER2-NEXT: negq %rcx # sched: [1:0.50] -; BTVER2-NEXT: negq (%r10) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_neg: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50] -; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: negb %dil # sched: [1:0.25] -; ZNVER1-NEXT: negb (%r8) # sched: [5:0.50] -; ZNVER1-NEXT: negw %si # sched: [1:0.25] -; ZNVER1-NEXT: negw (%r9) # sched: [5:0.50] -; ZNVER1-NEXT: negl %edx # sched: [1:0.25] -; ZNVER1-NEXT: negl (%rax) # sched: [5:0.50] -; ZNVER1-NEXT: negq %rcx # sched: [1:0.25] -; ZNVER1-NEXT: negq (%r10) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "negb $0 \0A\09 negb $4 \0A\09 negw $1 \0A\09 negw $5 \0A\09 negl $2 \0A\09 negl $6 \0A\09 negq $3 \0A\09 negq $7", "r,r,r,r,*m,*m,*m,*m"(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) nounwind - ret void -} - -define void @test_nop(i16 %a0, i32 %a1, i64 %a2, i16 *%p0, i32 *%p1, i64 *%p2) optsize { -; GENERIC-LABEL: test_nop: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: nop # sched: [1:0.25] -; GENERIC-NEXT: nopw %di # sched: [1:0.25] -; GENERIC-NEXT: nopw (%rcx) # sched: [1:0.25] -; GENERIC-NEXT: nopl %esi # sched: [1:0.25] -; GENERIC-NEXT: nopl (%r8) # sched: [1:0.25] -; GENERIC-NEXT: nopq %rdx # sched: [1:0.25] -; GENERIC-NEXT: nopq (%r9) # sched: [1:0.25] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_nop: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nopw %di # sched: [1:0.50] -; ATOM-NEXT: nopw (%rcx) # sched: [1:0.50] -; ATOM-NEXT: nopl %esi # sched: [1:0.50] -; ATOM-NEXT: nopl (%r8) # sched: [1:0.50] -; ATOM-NEXT: nopq %rdx # sched: [1:0.50] -; ATOM-NEXT: nopq (%r9) # sched: [1:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_nop: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: nop # sched: [1:0.50] -; SLM-NEXT: nopw %di # sched: [1:0.50] -; SLM-NEXT: nopw (%rcx) # sched: [1:0.50] -; SLM-NEXT: nopl %esi # sched: [1:0.50] -; SLM-NEXT: nopl (%r8) # sched: [1:0.50] -; SLM-NEXT: nopq %rdx # sched: [1:0.50] -; SLM-NEXT: nopq (%r9) # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_nop: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: nop # sched: [1:0.25] -; SANDY-NEXT: nopw %di # sched: [1:0.25] -; SANDY-NEXT: nopw (%rcx) # sched: [1:0.25] -; SANDY-NEXT: nopl %esi # sched: [1:0.25] -; SANDY-NEXT: nopl (%r8) # sched: [1:0.25] -; SANDY-NEXT: nopq %rdx # sched: [1:0.25] -; SANDY-NEXT: nopq (%r9) # sched: [1:0.25] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_nop: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: nop # sched: [1:0.25] -; HASWELL-NEXT: nopw %di # sched: [1:0.25] -; HASWELL-NEXT: nopw (%rcx) # sched: [1:0.25] -; HASWELL-NEXT: nopl %esi # sched: [1:0.25] -; HASWELL-NEXT: nopl (%r8) # sched: [1:0.25] -; HASWELL-NEXT: nopq %rdx # sched: [1:0.25] -; HASWELL-NEXT: nopq (%r9) # sched: [1:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_nop: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: nop # sched: [1:0.25] -; BROADWELL-NEXT: nopw %di # sched: [1:0.25] -; BROADWELL-NEXT: nopw (%rcx) # sched: [1:0.25] -; BROADWELL-NEXT: nopl %esi # sched: [1:0.25] -; BROADWELL-NEXT: nopl (%r8) # sched: [1:0.25] -; BROADWELL-NEXT: nopq %rdx # sched: [1:0.25] -; BROADWELL-NEXT: nopq (%r9) # sched: [1:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_nop: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: nop # sched: [1:0.17] -; SKYLAKE-NEXT: nopw %di # sched: [1:0.17] -; SKYLAKE-NEXT: nopw (%rcx) # sched: [1:0.17] -; SKYLAKE-NEXT: nopl %esi # sched: [1:0.17] -; SKYLAKE-NEXT: nopl (%r8) # sched: [1:0.17] -; SKYLAKE-NEXT: nopq %rdx # sched: [1:0.17] -; SKYLAKE-NEXT: nopq (%r9) # sched: [1:0.17] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_nop: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: nop # sched: [1:0.17] -; SKX-NEXT: nopw %di # sched: [1:0.17] -; SKX-NEXT: nopw (%rcx) # sched: [1:0.17] -; SKX-NEXT: nopl %esi # sched: [1:0.17] -; SKX-NEXT: nopl (%r8) # sched: [1:0.17] -; SKX-NEXT: nopq %rdx # sched: [1:0.17] -; SKX-NEXT: nopq (%r9) # sched: [1:0.17] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_nop: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: nop # sched: [1:0.50] -; BDVER2-NEXT: nopw %di # sched: [1:0.50] -; BDVER2-NEXT: nopw (%rcx) # sched: [1:0.50] -; BDVER2-NEXT: nopl %esi # sched: [1:0.50] -; BDVER2-NEXT: nopl (%r8) # sched: [1:0.50] -; BDVER2-NEXT: nopq %rdx # sched: [1:0.50] -; BDVER2-NEXT: nopq (%r9) # sched: [1:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_nop: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: nop # sched: [1:0.50] -; BTVER2-NEXT: nopw %di # sched: [1:0.50] -; BTVER2-NEXT: nopw (%rcx) # sched: [1:0.50] -; BTVER2-NEXT: nopl %esi # sched: [1:0.50] -; BTVER2-NEXT: nopl (%r8) # sched: [1:0.50] -; BTVER2-NEXT: nopq %rdx # sched: [1:0.50] -; BTVER2-NEXT: nopq (%r9) # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_nop: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: nop # sched: [1:0.25] -; ZNVER1-NEXT: nopw %di # sched: [1:0.25] -; ZNVER1-NEXT: nopw (%rcx) # sched: [1:0.25] -; ZNVER1-NEXT: nopl %esi # sched: [1:0.25] -; ZNVER1-NEXT: nopl (%r8) # sched: [1:0.25] -; ZNVER1-NEXT: nopq %rdx # sched: [1:0.25] -; ZNVER1-NEXT: nopq (%r9) # sched: [1:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "nop \0A\09 nopw $0 \0A\09 nopw $3 \0A\09 nopl $1 \0A\09 nopl $4 \0A\09 nopq $2 \0A\09 nopq $5", "r,r,r,*m,*m,*m"(i16 %a0, i32 %a1, i64 %a2, i16 *%p0, i32 *%p1, i64 *%p2) nounwind - ret void -} - -define void @test_not(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) optsize { -; GENERIC-LABEL: test_not: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; GENERIC-NEXT: #APP -; GENERIC-NEXT: notb %dil # sched: [1:0.33] -; GENERIC-NEXT: notb (%r8) # sched: [7:1.00] -; GENERIC-NEXT: notw %si # sched: [1:0.33] -; GENERIC-NEXT: notw (%r9) # sched: [7:1.00] -; GENERIC-NEXT: notl %edx # sched: [1:0.33] -; GENERIC-NEXT: notl (%rax) # sched: [7:1.00] -; GENERIC-NEXT: notq %rcx # sched: [1:0.33] -; GENERIC-NEXT: notq (%r10) # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_not: -; ATOM: # %bb.0: -; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [1:1.00] -; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: notb %dil # sched: [1:0.50] -; ATOM-NEXT: notb (%r8) # sched: [1:1.00] -; ATOM-NEXT: notw %si # sched: [1:0.50] -; ATOM-NEXT: notw (%r9) # sched: [1:1.00] -; ATOM-NEXT: notl %edx # sched: [1:0.50] -; ATOM-NEXT: notl (%rax) # sched: [1:1.00] -; ATOM-NEXT: notq %rcx # sched: [1:0.50] -; ATOM-NEXT: notq (%r10) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_not: -; SLM: # %bb.0: -; SLM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00] -; SLM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: notb %dil # sched: [1:0.50] -; SLM-NEXT: notb (%r8) # sched: [5:2.00] -; SLM-NEXT: notw %si # sched: [1:0.50] -; SLM-NEXT: notw (%r9) # sched: [5:2.00] -; SLM-NEXT: notl %edx # sched: [1:0.50] -; SLM-NEXT: notl (%rax) # sched: [5:2.00] -; SLM-NEXT: notq %rcx # sched: [1:0.50] -; SLM-NEXT: notq (%r10) # sched: [5:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_not: -; SANDY: # %bb.0: -; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: notb %dil # sched: [1:0.33] -; SANDY-NEXT: notb (%r8) # sched: [7:1.00] -; SANDY-NEXT: notw %si # sched: [1:0.33] -; SANDY-NEXT: notw (%r9) # sched: [7:1.00] -; SANDY-NEXT: notl %edx # sched: [1:0.33] -; SANDY-NEXT: notl (%rax) # sched: [7:1.00] -; SANDY-NEXT: notq %rcx # sched: [1:0.33] -; SANDY-NEXT: notq (%r10) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_not: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: notb %dil # sched: [1:0.25] -; HASWELL-NEXT: notb (%r8) # sched: [7:1.00] -; HASWELL-NEXT: notw %si # sched: [1:0.25] -; HASWELL-NEXT: notw (%r9) # sched: [7:1.00] -; HASWELL-NEXT: notl %edx # sched: [1:0.25] -; HASWELL-NEXT: notl (%rax) # sched: [7:1.00] -; HASWELL-NEXT: notq %rcx # sched: [1:0.25] -; HASWELL-NEXT: notq (%r10) # sched: [7:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_not: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: notb %dil # sched: [1:0.25] -; BROADWELL-NEXT: notb (%r8) # sched: [7:1.00] -; BROADWELL-NEXT: notw %si # sched: [1:0.25] -; BROADWELL-NEXT: notw (%r9) # sched: [7:1.00] -; BROADWELL-NEXT: notl %edx # sched: [1:0.25] -; BROADWELL-NEXT: notl (%rax) # sched: [7:1.00] -; BROADWELL-NEXT: notq %rcx # sched: [1:0.25] -; BROADWELL-NEXT: notq (%r10) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_not: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: notb %dil # sched: [1:0.25] -; SKYLAKE-NEXT: notb (%r8) # sched: [7:1.00] -; SKYLAKE-NEXT: notw %si # sched: [1:0.25] -; SKYLAKE-NEXT: notw (%r9) # sched: [7:1.00] -; SKYLAKE-NEXT: notl %edx # sched: [1:0.25] -; SKYLAKE-NEXT: notl (%rax) # sched: [7:1.00] -; SKYLAKE-NEXT: notq %rcx # sched: [1:0.25] -; SKYLAKE-NEXT: notq (%r10) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_not: -; SKX: # %bb.0: -; SKX-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; SKX-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: notb %dil # sched: [1:0.25] -; SKX-NEXT: notb (%r8) # sched: [7:1.00] -; SKX-NEXT: notw %si # sched: [1:0.25] -; SKX-NEXT: notw (%r9) # sched: [7:1.00] -; SKX-NEXT: notl %edx # sched: [1:0.25] -; SKX-NEXT: notl (%rax) # sched: [7:1.00] -; SKX-NEXT: notq %rcx # sched: [1:0.25] -; SKX-NEXT: notq (%r10) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_not: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] -; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: notb %dil # sched: [1:0.50] -; BDVER2-NEXT: notb (%r8) # sched: [6:1.00] -; BDVER2-NEXT: notw %si # sched: [1:0.50] -; BDVER2-NEXT: notw (%r9) # sched: [6:1.00] -; BDVER2-NEXT: notl %edx # sched: [1:0.50] -; BDVER2-NEXT: notl (%rax) # sched: [6:1.00] -; BDVER2-NEXT: notq %rcx # sched: [1:0.50] -; BDVER2-NEXT: notq (%r10) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_not: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00] -; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: notb %dil # sched: [1:0.50] -; BTVER2-NEXT: notb (%r8) # sched: [5:1.00] -; BTVER2-NEXT: notw %si # sched: [1:0.50] -; BTVER2-NEXT: notw (%r9) # sched: [5:1.00] -; BTVER2-NEXT: notl %edx # sched: [1:0.50] -; BTVER2-NEXT: notl (%rax) # sched: [5:1.00] -; BTVER2-NEXT: notq %rcx # sched: [1:0.50] -; BTVER2-NEXT: notq (%r10) # sched: [5:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_not: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50] -; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: notb %dil # sched: [1:0.25] -; ZNVER1-NEXT: notb (%r8) # sched: [5:0.50] -; ZNVER1-NEXT: notw %si # sched: [1:0.25] -; ZNVER1-NEXT: notw (%r9) # sched: [5:0.50] -; ZNVER1-NEXT: notl %edx # sched: [1:0.25] -; ZNVER1-NEXT: notl (%rax) # sched: [5:0.50] -; ZNVER1-NEXT: notq %rcx # sched: [1:0.25] -; ZNVER1-NEXT: notq (%r10) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "notb $0 \0A\09 notb $4 \0A\09 notw $1 \0A\09 notw $5 \0A\09 notl $2 \0A\09 notl $6 \0A\09 notq $3 \0A\09 notq $7", "r,r,r,r,*m,*m,*m,*m"(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) nounwind - ret void -} - -define void @test_or_8(i8 %a0, i8* %a1, i8 %a2) optsize { -; GENERIC-LABEL: test_or_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: orb $7, %al # sched: [1:0.33] -; GENERIC-NEXT: orb $7, %dil # sched: [1:0.33] -; GENERIC-NEXT: orb $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: orb %dl, %dil # sched: [1:0.33] -; GENERIC-NEXT: orb %dil, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: orb (%rsi), %dil # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_or_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: orb $7, %al # sched: [1:0.50] -; ATOM-NEXT: orb $7, %dil # sched: [1:0.50] -; ATOM-NEXT: orb $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: orb %dl, %dil # sched: [1:0.50] -; ATOM-NEXT: orb %dil, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: orb (%rsi), %dil # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_or_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: orb $7, %al # sched: [1:0.50] -; SLM-NEXT: orb $7, %dil # sched: [1:0.50] -; SLM-NEXT: orb $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: orb %dl, %dil # sched: [1:0.50] -; SLM-NEXT: orb %dil, (%rsi) # sched: [5:2.00] -; SLM-NEXT: orb (%rsi), %dil # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_or_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: orb $7, %al # sched: [1:0.33] -; SANDY-NEXT: orb $7, %dil # sched: [1:0.33] -; SANDY-NEXT: orb $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: orb %dl, %dil # sched: [1:0.33] -; SANDY-NEXT: orb %dil, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: orb (%rsi), %dil # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_or_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: orb $7, %al # sched: [1:0.25] -; HASWELL-NEXT: orb $7, %dil # sched: [1:0.25] -; HASWELL-NEXT: orb $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: orb %dl, %dil # sched: [1:0.25] -; HASWELL-NEXT: orb %dil, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: orb (%rsi), %dil # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_or_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: orb $7, %al # sched: [1:0.25] -; BROADWELL-NEXT: orb $7, %dil # sched: [1:0.25] -; BROADWELL-NEXT: orb $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: orb %dl, %dil # sched: [1:0.25] -; BROADWELL-NEXT: orb %dil, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: orb (%rsi), %dil # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_or_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: orb $7, %al # sched: [1:0.25] -; SKYLAKE-NEXT: orb $7, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: orb $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: orb %dl, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: orb %dil, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: orb (%rsi), %dil # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_or_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: orb $7, %al # sched: [1:0.25] -; SKX-NEXT: orb $7, %dil # sched: [1:0.25] -; SKX-NEXT: orb $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: orb %dl, %dil # sched: [1:0.25] -; SKX-NEXT: orb %dil, (%rsi) # sched: [7:1.00] -; SKX-NEXT: orb (%rsi), %dil # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_or_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: orb $7, %al # sched: [1:0.50] -; BDVER2-NEXT: orb $7, %dil # sched: [1:0.50] -; BDVER2-NEXT: orb $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: orb %dl, %dil # sched: [1:0.50] -; BDVER2-NEXT: orb %dil, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: orb (%rsi), %dil # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_or_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: orb $7, %al # sched: [1:0.50] -; BTVER2-NEXT: orb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: orb $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: orb %dl, %dil # sched: [1:0.50] -; BTVER2-NEXT: orb %dil, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: orb (%rsi), %dil # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_or_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: orb $7, %al # sched: [1:0.25] -; ZNVER1-NEXT: orb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: orb $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: orb %dl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: orb %dil, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: orb (%rsi), %dil # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "orb $3, %AL \0A\09 orb $3, $0 \0A\09 orb $3, $2 \0A\09 orb $1, $0 \0A\09 orb $0, $2 \0A\09 orb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind - ret void -} -define void @test_or_16(i16 %a0, i16* %a1, i16 %a2) optsize { -; GENERIC-LABEL: test_or_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: orw $511, %ax # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: orw $511, %di # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: orw $511, (%rsi) # imm = 0x1FF -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: orw $7, %di # sched: [1:0.33] -; GENERIC-NEXT: orw $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: orw %dx, %di # sched: [1:0.33] -; GENERIC-NEXT: orw %di, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: orw (%rsi), %di # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_or_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: orw $511, %ax # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: orw $511, %di # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: orw $511, (%rsi) # imm = 0x1FF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: orw $7, %di # sched: [1:0.50] -; ATOM-NEXT: orw $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: orw %dx, %di # sched: [1:0.50] -; ATOM-NEXT: orw %di, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: orw (%rsi), %di # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_or_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: orw $511, %ax # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: orw $511, %di # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: orw $511, (%rsi) # imm = 0x1FF -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: orw $7, %di # sched: [1:0.50] -; SLM-NEXT: orw $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: orw %dx, %di # sched: [1:0.50] -; SLM-NEXT: orw %di, (%rsi) # sched: [5:2.00] -; SLM-NEXT: orw (%rsi), %di # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_or_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: orw $511, %ax # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: orw $511, %di # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: orw $511, (%rsi) # imm = 0x1FF -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: orw $7, %di # sched: [1:0.33] -; SANDY-NEXT: orw $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: orw %dx, %di # sched: [1:0.33] -; SANDY-NEXT: orw %di, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: orw (%rsi), %di # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_or_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: orw $511, %ax # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: orw $511, %di # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: orw $511, (%rsi) # imm = 0x1FF -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: orw $7, %di # sched: [1:0.25] -; HASWELL-NEXT: orw $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: orw %dx, %di # sched: [1:0.25] -; HASWELL-NEXT: orw %di, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: orw (%rsi), %di # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_or_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: orw $511, %ax # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: orw $511, %di # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: orw $511, (%rsi) # imm = 0x1FF -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: orw $7, %di # sched: [1:0.25] -; BROADWELL-NEXT: orw $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: orw %dx, %di # sched: [1:0.25] -; BROADWELL-NEXT: orw %di, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: orw (%rsi), %di # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_or_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: orw $511, %ax # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: orw $511, %di # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: orw $511, (%rsi) # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: orw $7, %di # sched: [1:0.25] -; SKYLAKE-NEXT: orw $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: orw %dx, %di # sched: [1:0.25] -; SKYLAKE-NEXT: orw %di, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: orw (%rsi), %di # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_or_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: orw $511, %ax # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: orw $511, %di # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: orw $511, (%rsi) # imm = 0x1FF -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: orw $7, %di # sched: [1:0.25] -; SKX-NEXT: orw $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: orw %dx, %di # sched: [1:0.25] -; SKX-NEXT: orw %di, (%rsi) # sched: [7:1.00] -; SKX-NEXT: orw (%rsi), %di # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_or_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: orw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: orw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: orw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: orw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: orw $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: orw %dx, %di # sched: [1:0.50] -; BDVER2-NEXT: orw %di, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: orw (%rsi), %di # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_or_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: orw $511, %ax # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: orw $511, %di # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: orw $511, (%rsi) # imm = 0x1FF -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: orw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: orw $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: orw %dx, %di # sched: [1:0.50] -; BTVER2-NEXT: orw %di, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: orw (%rsi), %di # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_or_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: orw $511, %ax # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: orw $511, %di # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: orw $511, (%rsi) # imm = 0x1FF -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: orw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: orw $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: orw %dx, %di # sched: [1:0.25] -; ZNVER1-NEXT: orw %di, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: orw (%rsi), %di # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "orw $3, %AX \0A\09 orw $3, $0 \0A\09 orw $3, $2 \0A\09 orw $4, $0 \0A\09 orw $4, $2 \0A\09 orw $1, $0 \0A\09 orw $0, $2 \0A\09 orw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind - ret void -} -define void @test_or_32(i32 %a0, i32* %a1, i32 %a2) optsize { -; GENERIC-LABEL: test_or_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: orl $665536, %eax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: orl $665536, %edi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: orl $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: orl $7, %edi # sched: [1:0.33] -; GENERIC-NEXT: orl $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: orl %edx, %edi # sched: [1:0.33] -; GENERIC-NEXT: orl %edi, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: orl (%rsi), %edi # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_or_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: orl $665536, %eax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: orl $665536, %edi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: orl $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: orl $7, %edi # sched: [1:0.50] -; ATOM-NEXT: orl $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: orl %edx, %edi # sched: [1:0.50] -; ATOM-NEXT: orl %edi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: orl (%rsi), %edi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_or_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: orl $665536, %eax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: orl $665536, %edi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: orl $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: orl $7, %edi # sched: [1:0.50] -; SLM-NEXT: orl $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: orl %edx, %edi # sched: [1:0.50] -; SLM-NEXT: orl %edi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: orl (%rsi), %edi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_or_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: orl $665536, %eax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: orl $665536, %edi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: orl $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: orl $7, %edi # sched: [1:0.33] -; SANDY-NEXT: orl $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: orl %edx, %edi # sched: [1:0.33] -; SANDY-NEXT: orl %edi, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: orl (%rsi), %edi # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_or_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: orl $665536, %eax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: orl $665536, %edi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: orl $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: orl $7, %edi # sched: [1:0.25] -; HASWELL-NEXT: orl $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: orl %edx, %edi # sched: [1:0.25] -; HASWELL-NEXT: orl %edi, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: orl (%rsi), %edi # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_or_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: orl $665536, %eax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: orl $665536, %edi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: orl $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: orl $7, %edi # sched: [1:0.25] -; BROADWELL-NEXT: orl $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: orl %edx, %edi # sched: [1:0.25] -; BROADWELL-NEXT: orl %edi, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: orl (%rsi), %edi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_or_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: orl $665536, %eax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: orl $665536, %edi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: orl $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: orl $7, %edi # sched: [1:0.25] -; SKYLAKE-NEXT: orl $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: orl %edx, %edi # sched: [1:0.25] -; SKYLAKE-NEXT: orl %edi, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: orl (%rsi), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_or_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: orl $665536, %eax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: orl $665536, %edi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: orl $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: orl $7, %edi # sched: [1:0.25] -; SKX-NEXT: orl $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: orl %edx, %edi # sched: [1:0.25] -; SKX-NEXT: orl %edi, (%rsi) # sched: [7:1.00] -; SKX-NEXT: orl (%rsi), %edi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_or_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: orl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: orl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: orl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: orl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: orl $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: orl %edx, %edi # sched: [1:0.50] -; BDVER2-NEXT: orl %edi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: orl (%rsi), %edi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_or_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: orl $665536, %eax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: orl $665536, %edi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: orl $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: orl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: orl $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: orl %edx, %edi # sched: [1:0.50] -; BTVER2-NEXT: orl %edi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: orl (%rsi), %edi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_or_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: orl $665536, %eax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: orl $665536, %edi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: orl $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: orl $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: orl $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: orl %edx, %edi # sched: [1:0.25] -; ZNVER1-NEXT: orl %edi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: orl (%rsi), %edi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "orl $3, %EAX \0A\09 orl $3, $0 \0A\09 orl $3, $2 \0A\09 orl $4, $0 \0A\09 orl $4, $2 \0A\09 orl $1, $0 \0A\09 orl $0, $2 \0A\09 orl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind - ret void -} -define void @test_or_64(i64 %a0, i64* %a1, i64 %a2) optsize { -; GENERIC-LABEL: test_or_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: orq $665536, %rax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: orq $665536, %rdi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: orq $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: orq $7, %rdi # sched: [1:0.33] -; GENERIC-NEXT: orq $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: orq %rdx, %rdi # sched: [1:0.33] -; GENERIC-NEXT: orq %rdi, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: orq (%rsi), %rdi # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_or_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: orq $665536, %rax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: orq $665536, %rdi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: orq $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: orq $7, %rdi # sched: [1:0.50] -; ATOM-NEXT: orq $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: orq %rdx, %rdi # sched: [1:0.50] -; ATOM-NEXT: orq %rdi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: orq (%rsi), %rdi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_or_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: orq $665536, %rax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: orq $665536, %rdi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: orq $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: orq $7, %rdi # sched: [1:0.50] -; SLM-NEXT: orq $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: orq %rdx, %rdi # sched: [1:0.50] -; SLM-NEXT: orq %rdi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: orq (%rsi), %rdi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_or_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: orq $665536, %rax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: orq $665536, %rdi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: orq $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: orq $7, %rdi # sched: [1:0.33] -; SANDY-NEXT: orq $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: orq %rdx, %rdi # sched: [1:0.33] -; SANDY-NEXT: orq %rdi, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: orq (%rsi), %rdi # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_or_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: orq $665536, %rax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: orq $665536, %rdi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: orq $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: orq $7, %rdi # sched: [1:0.25] -; HASWELL-NEXT: orq $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: orq %rdx, %rdi # sched: [1:0.25] -; HASWELL-NEXT: orq %rdi, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: orq (%rsi), %rdi # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_or_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: orq $665536, %rax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: orq $665536, %rdi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: orq $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: orq $7, %rdi # sched: [1:0.25] -; BROADWELL-NEXT: orq $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: orq %rdx, %rdi # sched: [1:0.25] -; BROADWELL-NEXT: orq %rdi, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: orq (%rsi), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_or_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: orq $665536, %rax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: orq $665536, %rdi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: orq $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: orq $7, %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: orq $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: orq %rdx, %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: orq %rdi, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: orq (%rsi), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_or_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: orq $665536, %rax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: orq $665536, %rdi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: orq $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: orq $7, %rdi # sched: [1:0.25] -; SKX-NEXT: orq $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: orq %rdx, %rdi # sched: [1:0.25] -; SKX-NEXT: orq %rdi, (%rsi) # sched: [7:1.00] -; SKX-NEXT: orq (%rsi), %rdi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_or_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: orq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: orq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: orq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: orq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: orq $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: orq %rdx, %rdi # sched: [1:0.50] -; BDVER2-NEXT: orq %rdi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: orq (%rsi), %rdi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_or_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: orq $665536, %rax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: orq $665536, %rdi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: orq $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: orq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: orq $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: orq %rdx, %rdi # sched: [1:0.50] -; BTVER2-NEXT: orq %rdi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: orq (%rsi), %rdi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_or_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: orq $665536, %rax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: orq $665536, %rdi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: orq $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: orq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: orq $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: orq %rdx, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: orq %rdi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: orq (%rsi), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "orq $3, %RAX \0A\09 orq $3, $0 \0A\09 orq $3, $2 \0A\09 orq $4, $0 \0A\09 orq $4, $2 \0A\09 orq $1, $0 \0A\09 orq $0, $2 \0A\09 orq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind - ret void -} - -define void @test_out() optsize { -; GENERIC-LABEL: test_out: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: outb %al, $7 # sched: [100:0.33] -; GENERIC-NEXT: outw %ax, $7 # sched: [100:0.33] -; GENERIC-NEXT: outl %eax, $7 # sched: [100:0.33] -; GENERIC-NEXT: outb %al, %dx # sched: [100:0.33] -; GENERIC-NEXT: outw %ax, %dx # sched: [100:0.33] -; GENERIC-NEXT: outl %eax, %dx # sched: [100:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_out: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: outb %al, $7 # sched: [72:36.00] -; ATOM-NEXT: outw %ax, $7 # sched: [72:36.00] -; ATOM-NEXT: outl %eax, $7 # sched: [72:36.00] -; ATOM-NEXT: outb %al, %dx # sched: [68:34.00] -; ATOM-NEXT: outw %ax, %dx # sched: [68:34.00] -; ATOM-NEXT: outl %eax, %dx # sched: [68:34.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_out: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: outb %al, $7 # sched: [100:1.00] -; SLM-NEXT: outw %ax, $7 # sched: [100:1.00] -; SLM-NEXT: outl %eax, $7 # sched: [100:1.00] -; SLM-NEXT: outb %al, %dx # sched: [100:1.00] -; SLM-NEXT: outw %ax, %dx # sched: [100:1.00] -; SLM-NEXT: outl %eax, %dx # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_out: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: outb %al, $7 # sched: [100:0.33] -; SANDY-NEXT: outw %ax, $7 # sched: [100:0.33] -; SANDY-NEXT: outl %eax, $7 # sched: [100:0.33] -; SANDY-NEXT: outb %al, %dx # sched: [100:0.33] -; SANDY-NEXT: outw %ax, %dx # sched: [100:0.33] -; SANDY-NEXT: outl %eax, %dx # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_out: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: outb %al, $7 # sched: [36:5.00] -; HASWELL-NEXT: outw %ax, $7 # sched: [36:5.00] -; HASWELL-NEXT: outl %eax, $7 # sched: [36:5.00] -; HASWELL-NEXT: outb %al, %dx # sched: [36:5.00] -; HASWELL-NEXT: outw %ax, %dx # sched: [36:5.00] -; HASWELL-NEXT: outl %eax, %dx # sched: [36:5.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_out: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: outb %al, $7 # sched: [35:5.00] -; BROADWELL-NEXT: outw %ax, $7 # sched: [35:5.00] -; BROADWELL-NEXT: outl %eax, $7 # sched: [35:5.00] -; BROADWELL-NEXT: outb %al, %dx # sched: [35:5.00] -; BROADWELL-NEXT: outw %ax, %dx # sched: [35:5.00] -; BROADWELL-NEXT: outl %eax, %dx # sched: [35:5.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_out: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: outb %al, $7 # sched: [35:5.00] -; SKYLAKE-NEXT: outw %ax, $7 # sched: [35:5.00] -; SKYLAKE-NEXT: outl %eax, $7 # sched: [35:5.00] -; SKYLAKE-NEXT: outb %al, %dx # sched: [35:5.00] -; SKYLAKE-NEXT: outw %ax, %dx # sched: [35:5.00] -; SKYLAKE-NEXT: outl %eax, %dx # sched: [35:5.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_out: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: outb %al, $7 # sched: [35:5.00] -; SKX-NEXT: outw %ax, $7 # sched: [35:5.00] -; SKX-NEXT: outl %eax, $7 # sched: [35:5.00] -; SKX-NEXT: outb %al, %dx # sched: [35:5.00] -; SKX-NEXT: outw %ax, %dx # sched: [35:5.00] -; SKX-NEXT: outl %eax, %dx # sched: [35:5.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_out: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: outb %al, $7 # sched: [100:0.50] -; BDVER2-NEXT: outw %ax, $7 # sched: [100:0.50] -; BDVER2-NEXT: outl %eax, $7 # sched: [100:0.50] -; BDVER2-NEXT: outb %al, %dx # sched: [100:0.50] -; BDVER2-NEXT: outw %ax, %dx # sched: [100:0.50] -; BDVER2-NEXT: outl %eax, %dx # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_out: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: outb %al, $7 # sched: [100:0.50] -; BTVER2-NEXT: outw %ax, $7 # sched: [100:0.50] -; BTVER2-NEXT: outl %eax, $7 # sched: [100:0.50] -; BTVER2-NEXT: outb %al, %dx # sched: [100:0.50] -; BTVER2-NEXT: outw %ax, %dx # sched: [100:0.50] -; BTVER2-NEXT: outl %eax, %dx # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_out: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: outb %al, $7 # sched: [100:0.25] -; ZNVER1-NEXT: outw %ax, $7 # sched: [100:0.25] -; ZNVER1-NEXT: outl %eax, $7 # sched: [100:0.25] -; ZNVER1-NEXT: outb %al, %dx # sched: [100:0.25] -; ZNVER1-NEXT: outw %ax, %dx # sched: [100:0.25] -; ZNVER1-NEXT: outl %eax, %dx # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "outb %AL, $0 \0A\09 outw %AX, $0 \0A\09 outl %EAX, $0 \0A\09 outb %AL, %DX \0A\09 outw %AX, %DX \0A\09 outl %EAX, %DX", "i"(i8 7) nounwind - ret void -} - -define void @test_outs() optsize { -; GENERIC-LABEL: test_outs: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: outsb (%rsi), %dx # sched: [100:0.33] -; GENERIC-NEXT: outsw (%rsi), %dx # sched: [100:0.33] -; GENERIC-NEXT: outsl (%rsi), %dx # sched: [100:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_outs: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: outsb (%rsi), %dx # sched: [74:37.00] -; ATOM-NEXT: outsw (%rsi), %dx # sched: [74:37.00] -; ATOM-NEXT: outsl (%rsi), %dx # sched: [74:37.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_outs: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: outsb (%rsi), %dx # sched: [100:1.00] -; SLM-NEXT: outsw (%rsi), %dx # sched: [100:1.00] -; SLM-NEXT: outsl (%rsi), %dx # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_outs: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: outsb (%rsi), %dx # sched: [100:0.33] -; SANDY-NEXT: outsw (%rsi), %dx # sched: [100:0.33] -; SANDY-NEXT: outsl (%rsi), %dx # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_outs: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: outsb (%rsi), %dx # sched: [100:0.25] -; HASWELL-NEXT: outsw (%rsi), %dx # sched: [100:0.25] -; HASWELL-NEXT: outsl (%rsi), %dx # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_outs: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: outsb (%rsi), %dx # sched: [100:0.25] -; BROADWELL-NEXT: outsw (%rsi), %dx # sched: [100:0.25] -; BROADWELL-NEXT: outsl (%rsi), %dx # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_outs: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: outsb (%rsi), %dx # sched: [100:0.25] -; SKYLAKE-NEXT: outsw (%rsi), %dx # sched: [100:0.25] -; SKYLAKE-NEXT: outsl (%rsi), %dx # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_outs: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: outsb (%rsi), %dx # sched: [100:0.25] -; SKX-NEXT: outsw (%rsi), %dx # sched: [100:0.25] -; SKX-NEXT: outsl (%rsi), %dx # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_outs: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: outsb (%rsi), %dx # sched: [100:0.50] -; BDVER2-NEXT: outsw (%rsi), %dx # sched: [100:0.50] -; BDVER2-NEXT: outsl (%rsi), %dx # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_outs: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: outsb (%rsi), %dx # sched: [100:0.50] -; BTVER2-NEXT: outsw (%rsi), %dx # sched: [100:0.50] -; BTVER2-NEXT: outsl (%rsi), %dx # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_outs: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: outsb (%rsi), %dx # sched: [100:0.25] -; ZNVER1-NEXT: outsw (%rsi), %dx # sched: [100:0.25] -; ZNVER1-NEXT: outsl (%rsi), %dx # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "outsb \0A\09 outsw \0A\09 outsl", ""() - ret void -} - -define void @test_pause() optsize { -; GENERIC-LABEL: test_pause: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: pause # sched: [4:1.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pause: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: pause # sched: [17:8.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pause: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: pause # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pause: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: pause # sched: [4:1.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pause: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: pause # sched: [5:1.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pause: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: pause # sched: [5:1.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pause: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: pause # sched: [4:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pause: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: pause # sched: [140:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pause: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: pause # sched: [1:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pause: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: pause # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pause: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: pause # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "pause", ""() - ret void -} - -define void @test_pop_push() optsize { -; GENERIC-LABEL: test_pop_push: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: popq %fs # sched: [100:0.33] -; GENERIC-NEXT: popq %gs # sched: [100:0.33] -; GENERIC-NEXT: pushq %fs # sched: [3:1.00] -; GENERIC-NEXT: pushq %gs # sched: [5:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pop_push: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: popq %fs # sched: [29:14.50] -; ATOM-NEXT: popq %gs # sched: [29:14.50] -; ATOM-NEXT: pushq %fs # sched: [2:1.00] -; ATOM-NEXT: pushq %gs # sched: [2:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pop_push: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: popq %fs # sched: [100:1.00] -; SLM-NEXT: popq %gs # sched: [100:1.00] -; SLM-NEXT: pushq %fs # sched: [100:1.00] -; SLM-NEXT: pushq %gs # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pop_push: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: popq %fs # sched: [100:0.33] -; SANDY-NEXT: popq %gs # sched: [100:0.33] -; SANDY-NEXT: pushq %fs # sched: [3:1.00] -; SANDY-NEXT: pushq %gs # sched: [5:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pop_push: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: popq %fs # sched: [100:0.25] -; HASWELL-NEXT: popq %gs # sched: [100:0.25] -; HASWELL-NEXT: pushq %fs # sched: [100:0.25] -; HASWELL-NEXT: pushq %gs # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pop_push: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: popq %fs # sched: [100:0.25] -; BROADWELL-NEXT: popq %gs # sched: [100:0.25] -; BROADWELL-NEXT: pushq %fs # sched: [100:0.25] -; BROADWELL-NEXT: pushq %gs # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pop_push: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: popq %fs # sched: [100:0.25] -; SKYLAKE-NEXT: popq %gs # sched: [100:0.25] -; SKYLAKE-NEXT: pushq %fs # sched: [100:0.25] -; SKYLAKE-NEXT: pushq %gs # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pop_push: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: popq %fs # sched: [100:0.25] -; SKX-NEXT: popq %gs # sched: [100:0.25] -; SKX-NEXT: pushq %fs # sched: [100:0.25] -; SKX-NEXT: pushq %gs # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pop_push: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: popq %fs # sched: [100:0.50] -; BDVER2-NEXT: popq %gs # sched: [100:0.50] -; BDVER2-NEXT: pushq %fs # sched: [100:0.50] -; BDVER2-NEXT: pushq %gs # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pop_push: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: popq %fs # sched: [100:0.50] -; BTVER2-NEXT: popq %gs # sched: [100:0.50] -; BTVER2-NEXT: pushq %fs # sched: [100:0.50] -; BTVER2-NEXT: pushq %gs # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pop_push: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: popq %fs # sched: [100:0.25] -; ZNVER1-NEXT: popq %gs # sched: [100:0.25] -; ZNVER1-NEXT: pushq %fs # sched: [100:0.25] -; ZNVER1-NEXT: pushq %gs # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "pop %FS \0A\09 pop %GS \0A\09 push %FS \0A\09 push %GS", ""() - ret void -} -define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize { -; GENERIC-LABEL: test_pop_push_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: popw %ax # sched: [6:0.50] -; GENERIC-NEXT: popw (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: pushw %di # sched: [5:1.00] -; GENERIC-NEXT: pushw (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: pushw $4095 # imm = 0xFFF -; GENERIC-NEXT: # sched: [1:1.00] -; GENERIC-NEXT: pushw $7 # sched: [1:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pop_push_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: popw %ax # sched: [2:1.00] -; ATOM-NEXT: popw (%rsi) # sched: [3:1.50] -; ATOM-NEXT: pushw %di # sched: [1:1.00] -; ATOM-NEXT: pushw (%rsi) # sched: [2:1.00] -; ATOM-NEXT: pushw $4095 # imm = 0xFFF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: pushw $7 # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pop_push_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: popw %ax # sched: [3:1.00] -; SLM-NEXT: popw (%rsi) # sched: [4:2.00] -; SLM-NEXT: pushw %di # sched: [1:1.00] -; SLM-NEXT: pushw (%rsi) # sched: [4:2.00] -; SLM-NEXT: pushw $4095 # imm = 0xFFF -; SLM-NEXT: # sched: [1:1.00] -; SLM-NEXT: pushw $7 # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pop_push_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: popw %ax # sched: [6:0.50] -; SANDY-NEXT: popw (%rsi) # sched: [6:0.50] -; SANDY-NEXT: pushw %di # sched: [5:1.00] -; SANDY-NEXT: pushw (%rsi) # sched: [5:1.00] -; SANDY-NEXT: pushw $4095 # imm = 0xFFF -; SANDY-NEXT: # sched: [1:1.00] -; SANDY-NEXT: pushw $7 # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pop_push_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: popw %ax # sched: [6:0.50] -; HASWELL-NEXT: popw (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: pushw %di # sched: [2:1.00] -; HASWELL-NEXT: pushw (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: pushw $4095 # imm = 0xFFF -; HASWELL-NEXT: # sched: [1:1.00] -; HASWELL-NEXT: pushw $7 # sched: [1:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pop_push_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: popw %ax # sched: [6:0.50] -; BROADWELL-NEXT: popw (%rsi) # sched: [6:1.00] -; BROADWELL-NEXT: pushw %di # sched: [2:1.00] -; BROADWELL-NEXT: pushw (%rsi) # sched: [6:1.00] -; BROADWELL-NEXT: pushw $4095 # imm = 0xFFF -; BROADWELL-NEXT: # sched: [1:1.00] -; BROADWELL-NEXT: pushw $7 # sched: [1:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pop_push_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: popw %ax # sched: [6:0.50] -; SKYLAKE-NEXT: popw (%rsi) # sched: [6:1.00] -; SKYLAKE-NEXT: pushw %di # sched: [2:1.00] -; SKYLAKE-NEXT: pushw (%rsi) # sched: [6:1.00] -; SKYLAKE-NEXT: pushw $4095 # imm = 0xFFF -; SKYLAKE-NEXT: # sched: [1:1.00] -; SKYLAKE-NEXT: pushw $7 # sched: [1:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pop_push_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: popw %ax # sched: [6:0.50] -; SKX-NEXT: popw (%rsi) # sched: [6:1.00] -; SKX-NEXT: pushw %di # sched: [2:1.00] -; SKX-NEXT: pushw (%rsi) # sched: [6:1.00] -; SKX-NEXT: pushw $4095 # imm = 0xFFF -; SKX-NEXT: # sched: [1:1.00] -; SKX-NEXT: pushw $7 # sched: [1:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pop_push_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: popw %ax # sched: [5:0.50] -; BDVER2-NEXT: popw (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: pushw %di # sched: [1:1.00] -; BDVER2-NEXT: pushw (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: pushw $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: pushw $7 # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pop_push_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: popw %ax # sched: [3:1.00] -; BTVER2-NEXT: popw (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: pushw %di # sched: [1:1.00] -; BTVER2-NEXT: pushw (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: pushw $4095 # imm = 0xFFF -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: pushw $7 # sched: [1:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pop_push_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: popw %ax # sched: [8:0.50] -; ZNVER1-NEXT: popw (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: pushw %di # sched: [1:0.50] -; ZNVER1-NEXT: pushw (%rsi) # sched: [4:0.50] -; ZNVER1-NEXT: pushw $4095 # imm = 0xFFF -; ZNVER1-NEXT: # sched: [1:0.50] -; ZNVER1-NEXT: pushw $7 # sched: [1:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i16 asm sideeffect "popw $0 \0A\09 popw $2 \0A\09 pushw $1 \0A\09 pushw $2 \0A\09 pushw $3 \0A\09 pushw $4", "=r,r,*m,i,i"(i16 %a0, i16 *%a1, i16 4095, i8 7) - ret i16 %1 -} -define i64 @test_pop_push_64(i64 %a0, i64 *%a1) optsize { -; GENERIC-LABEL: test_pop_push_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: popq %rax # sched: [6:0.50] -; GENERIC-NEXT: popq (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: pushq %rdi # sched: [5:1.00] -; GENERIC-NEXT: pushq (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: pushq $4095 # imm = 0xFFF -; GENERIC-NEXT: # sched: [1:1.00] -; GENERIC-NEXT: pushq $7 # sched: [5:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pop_push_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: popq %rax # sched: [1:1.00] -; ATOM-NEXT: popq (%rsi) # sched: [3:1.50] -; ATOM-NEXT: pushq %rdi # sched: [1:1.00] -; ATOM-NEXT: pushq (%rsi) # sched: [2:1.00] -; ATOM-NEXT: pushq $4095 # imm = 0xFFF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: pushq $7 # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pop_push_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: popq %rax # sched: [3:1.00] -; SLM-NEXT: popq (%rsi) # sched: [4:2.00] -; SLM-NEXT: pushq %rdi # sched: [1:1.00] -; SLM-NEXT: pushq (%rsi) # sched: [4:2.00] -; SLM-NEXT: pushq $4095 # imm = 0xFFF -; SLM-NEXT: # sched: [1:1.00] -; SLM-NEXT: pushq $7 # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_pop_push_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: popq %rax # sched: [6:0.50] -; SANDY-NEXT: popq (%rsi) # sched: [6:0.50] -; SANDY-NEXT: pushq %rdi # sched: [5:1.00] -; SANDY-NEXT: pushq (%rsi) # sched: [5:1.00] -; SANDY-NEXT: pushq $4095 # imm = 0xFFF -; SANDY-NEXT: # sched: [1:1.00] -; SANDY-NEXT: pushq $7 # sched: [5:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_pop_push_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: popq %rax # sched: [6:0.50] -; HASWELL-NEXT: popq (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: pushq %rdi # sched: [2:1.00] -; HASWELL-NEXT: pushq (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: pushq $4095 # imm = 0xFFF -; HASWELL-NEXT: # sched: [1:1.00] -; HASWELL-NEXT: pushq $7 # sched: [2:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pop_push_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: popq %rax # sched: [6:0.50] -; BROADWELL-NEXT: popq (%rsi) # sched: [6:1.00] -; BROADWELL-NEXT: pushq %rdi # sched: [2:1.00] -; BROADWELL-NEXT: pushq (%rsi) # sched: [6:1.00] -; BROADWELL-NEXT: pushq $4095 # imm = 0xFFF -; BROADWELL-NEXT: # sched: [1:1.00] -; BROADWELL-NEXT: pushq $7 # sched: [2:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pop_push_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: popq %rax # sched: [6:0.50] -; SKYLAKE-NEXT: popq (%rsi) # sched: [6:1.00] -; SKYLAKE-NEXT: pushq %rdi # sched: [2:1.00] -; SKYLAKE-NEXT: pushq (%rsi) # sched: [6:1.00] -; SKYLAKE-NEXT: pushq $4095 # imm = 0xFFF -; SKYLAKE-NEXT: # sched: [1:1.00] -; SKYLAKE-NEXT: pushq $7 # sched: [2:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pop_push_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: popq %rax # sched: [6:0.50] -; SKX-NEXT: popq (%rsi) # sched: [6:1.00] -; SKX-NEXT: pushq %rdi # sched: [2:1.00] -; SKX-NEXT: pushq (%rsi) # sched: [6:1.00] -; SKX-NEXT: pushq $4095 # imm = 0xFFF -; SKX-NEXT: # sched: [1:1.00] -; SKX-NEXT: pushq $7 # sched: [2:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_pop_push_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: popq %rax # sched: [5:0.50] -; BDVER2-NEXT: popq (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: pushq %rdi # sched: [1:1.00] -; BDVER2-NEXT: pushq (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: pushq $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: pushq $7 # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_pop_push_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: popq %rax # sched: [3:1.00] -; BTVER2-NEXT: popq (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: pushq %rdi # sched: [1:1.00] -; BTVER2-NEXT: pushq (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: pushq $4095 # imm = 0xFFF -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: pushq $7 # sched: [1:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_pop_push_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: popq %rax # sched: [8:0.50] -; ZNVER1-NEXT: popq (%rsi) # sched: [9:1.00] -; ZNVER1-NEXT: pushq %rdi # sched: [1:0.50] -; ZNVER1-NEXT: pushq (%rsi) # sched: [9:1.00] -; ZNVER1-NEXT: pushq $4095 # imm = 0xFFF -; ZNVER1-NEXT: # sched: [1:0.50] -; ZNVER1-NEXT: pushq $7 # sched: [1:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i64 asm sideeffect "popq $0 \0A\09 popq $2 \0A\09 pushq $1 \0A\09 pushq $2 \0A\09 pushq $3 \0A\09 pushq $4", "=r,r,*m,i,i"(i64 %a0, i64 *%a1, i64 4095, i8 7) - ret i64 %1 -} - -define void @test_popf_pushf() optsize { -; GENERIC-LABEL: test_popf_pushf: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: popfq # sched: [5:0.50] -; GENERIC-NEXT: pushfq # sched: [5:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_popf_pushf: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: popfq # sched: [26:13.00] -; ATOM-NEXT: pushfq # sched: [9:4.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_popf_pushf: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: popfq # sched: [3:1.00] -; SLM-NEXT: pushfq # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_popf_pushf: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: popfq # sched: [5:0.50] -; SANDY-NEXT: pushfq # sched: [5:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_popf_pushf: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: popfq # sched: [5:0.50] -; HASWELL-NEXT: pushfq # sched: [5:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_popf_pushf: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: popfq # sched: [22:4.25] -; BROADWELL-NEXT: pushfq # sched: [5:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_popf_pushf: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: popfq # sched: [5:0.50] -; SKYLAKE-NEXT: pushfq # sched: [5:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_popf_pushf: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: popfq # sched: [5:0.50] -; SKX-NEXT: pushfq # sched: [5:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_popf_pushf: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: popfq # sched: [5:0.50] -; BDVER2-NEXT: pushfq # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_popf_pushf: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: popfq # sched: [3:1.00] -; BTVER2-NEXT: pushfq # sched: [1:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_popf_pushf: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: popfq # sched: [8:0.50] -; ZNVER1-NEXT: pushfq # sched: [1:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "popf \0A\09 pushf", ""() - ret void -} - -define void @test_rcl_rcr_8(i8 %a0, i8 %a1, i8 *%a2) optsize { -; GENERIC-LABEL: test_rcl_rcr_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: rclb %dil # sched: [2:1.50] -; GENERIC-NEXT: rcrb %dil # sched: [2:1.50] -; GENERIC-NEXT: rclb (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcrb (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rclb $7, %dil # sched: [5:4.00] -; GENERIC-NEXT: rcrb $7, %dil # sched: [5:4.00] -; GENERIC-NEXT: rclb $7, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcrb $7, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rclb %cl, %dil # sched: [5:4.00] -; GENERIC-NEXT: rcrb %cl, %dil # sched: [5:4.00] -; GENERIC-NEXT: rclb %cl, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcrb %cl, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rcl_rcr_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: rclb %dil # sched: [1:1.00] -; ATOM-NEXT: rcrb %dil # sched: [1:1.00] -; ATOM-NEXT: rclb (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcrb (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rclb $7, %dil # sched: [1:1.00] -; ATOM-NEXT: rcrb $7, %dil # sched: [1:1.00] -; ATOM-NEXT: rclb $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcrb $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rclb %cl, %dil # sched: [1:1.00] -; ATOM-NEXT: rcrb %cl, %dil # sched: [1:1.00] -; ATOM-NEXT: rclb %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcrb %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rcl_rcr_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: rclb %dil # sched: [1:1.00] -; SLM-NEXT: rcrb %dil # sched: [1:1.00] -; SLM-NEXT: rclb (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcrb (%rdx) # sched: [4:2.00] -; SLM-NEXT: rclb $7, %dil # sched: [1:1.00] -; SLM-NEXT: rcrb $7, %dil # sched: [1:1.00] -; SLM-NEXT: rclb $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcrb $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rclb %cl, %dil # sched: [1:1.00] -; SLM-NEXT: rcrb %cl, %dil # sched: [1:1.00] -; SLM-NEXT: rclb %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcrb %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_rcl_rcr_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: rclb %dil # sched: [2:1.50] -; SANDY-NEXT: rcrb %dil # sched: [2:1.50] -; SANDY-NEXT: rclb (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcrb (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rclb $7, %dil # sched: [5:4.00] -; SANDY-NEXT: rcrb $7, %dil # sched: [5:4.00] -; SANDY-NEXT: rclb $7, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcrb $7, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rclb %cl, %dil # sched: [5:4.00] -; SANDY-NEXT: rcrb %cl, %dil # sched: [5:4.00] -; SANDY-NEXT: rclb %cl, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcrb %cl, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rcl_rcr_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: rclb %dil # sched: [3:0.75] -; HASWELL-NEXT: rcrb %dil # sched: [3:0.75] -; HASWELL-NEXT: rclb (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rcrb (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rclb $7, %dil # sched: [3:0.75] -; HASWELL-NEXT: rcrb $7, %dil # sched: [3:0.75] -; HASWELL-NEXT: rclb $7, (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rcrb $7, (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rclb %cl, %dil # sched: [11:2.25] -; HASWELL-NEXT: rcrb %cl, %dil # sched: [14:2.50] -; HASWELL-NEXT: rclb %cl, (%rdx) # sched: [16:2.00] -; HASWELL-NEXT: rcrb %cl, (%rdx) # sched: [19:2.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rcl_rcr_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: rclb %dil # sched: [3:0.75] -; BROADWELL-NEXT: rcrb %dil # sched: [3:0.75] -; BROADWELL-NEXT: rclb (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rcrb (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rclb $7, %dil # sched: [3:0.75] -; BROADWELL-NEXT: rcrb $7, %dil # sched: [3:0.75] -; BROADWELL-NEXT: rclb $7, (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rcrb $7, (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rclb %cl, %dil # sched: [11:2.25] -; BROADWELL-NEXT: rcrb %cl, %dil # sched: [14:2.50] -; BROADWELL-NEXT: rclb %cl, (%rdx) # sched: [15:2.00] -; BROADWELL-NEXT: rcrb %cl, (%rdx) # sched: [18:2.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rcl_rcr_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: rclb %dil # sched: [3:0.75] -; SKYLAKE-NEXT: rcrb %dil # sched: [3:0.75] -; SKYLAKE-NEXT: rclb (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rcrb (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rclb $7, %dil # sched: [3:0.75] -; SKYLAKE-NEXT: rcrb $7, %dil # sched: [3:0.75] -; SKYLAKE-NEXT: rclb $7, (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rcrb $7, (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rclb %cl, %dil # sched: [11:2.50] -; SKYLAKE-NEXT: rcrb %cl, %dil # sched: [14:2.50] -; SKYLAKE-NEXT: rclb %cl, (%rdx) # sched: [15:2.50] -; SKYLAKE-NEXT: rcrb %cl, (%rdx) # sched: [18:2.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rcl_rcr_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: rclb %dil # sched: [3:0.75] -; SKX-NEXT: rcrb %dil # sched: [3:0.75] -; SKX-NEXT: rclb (%rdx) # sched: [8:0.75] -; SKX-NEXT: rcrb (%rdx) # sched: [8:0.75] -; SKX-NEXT: rclb $7, %dil # sched: [3:0.75] -; SKX-NEXT: rcrb $7, %dil # sched: [3:0.75] -; SKX-NEXT: rclb $7, (%rdx) # sched: [8:0.75] -; SKX-NEXT: rcrb $7, (%rdx) # sched: [8:0.75] -; SKX-NEXT: rclb %cl, %dil # sched: [11:2.50] -; SKX-NEXT: rcrb %cl, %dil # sched: [14:2.50] -; SKX-NEXT: rclb %cl, (%rdx) # sched: [15:2.50] -; SKX-NEXT: rcrb %cl, (%rdx) # sched: [18:2.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_rcl_rcr_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: rclb %dil # sched: [1:0.50] -; BDVER2-NEXT: rcrb %dil # sched: [1:0.50] -; BDVER2-NEXT: rclb (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcrb (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rclb $7, %dil # sched: [13:0.50] -; BDVER2-NEXT: rcrb $7, %dil # sched: [12:0.50] -; BDVER2-NEXT: rclb $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcrb $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rclb %cl, %dil # sched: [12:0.50] -; BDVER2-NEXT: rcrb %cl, %dil # sched: [11:0.50] -; BDVER2-NEXT: rclb %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcrb %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_rcl_rcr_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: rclb %dil # sched: [1:0.50] -; BTVER2-NEXT: rcrb %dil # sched: [1:0.50] -; BTVER2-NEXT: rclb (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcrb (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rclb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: rcrb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: rclb $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcrb $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rclb %cl, %dil # sched: [1:0.50] -; BTVER2-NEXT: rcrb %cl, %dil # sched: [1:0.50] -; BTVER2-NEXT: rclb %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcrb %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_rcl_rcr_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: rclb %dil # sched: [1:0.25] -; ZNVER1-NEXT: rcrb %dil # sched: [1:0.25] -; ZNVER1-NEXT: rclb (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcrb (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rclb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: rcrb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: rclb $7, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcrb $7, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rclb %cl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: rcrb %cl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: rclb %cl, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcrb %cl, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "rclb $0 \0A\09 rcrb $0 \0A\09 rclb $2 \0A\09 rcrb $2 \0A\09 rclb $3, $0 \0A\09 rcrb $3, $0 \0A\09 rclb $3, $2 \0A\09 rcrb $3, $2 \0A\09 rclb %CL, $0 \0A\09 rcrb %CL, $0 \0A\09 rclb %CL, $2 \0A\09 rcrb %CL, $2", "r,r,*m,i"(i8 %a0, i8 %a1, i8 *%a2, i8 7) - ret void -} -define void @test_rcl_rcr_16(i16 %a0, i16 %a1, i16 *%a2) optsize { -; GENERIC-LABEL: test_rcl_rcr_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: rclw %di # sched: [2:1.50] -; GENERIC-NEXT: rcrw %di # sched: [2:1.50] -; GENERIC-NEXT: rclw (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcrw (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rclw $7, %di # sched: [5:4.00] -; GENERIC-NEXT: rcrw $7, %di # sched: [5:4.00] -; GENERIC-NEXT: rclw $7, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcrw $7, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rclw %cl, %di # sched: [5:4.00] -; GENERIC-NEXT: rcrw %cl, %di # sched: [5:4.00] -; GENERIC-NEXT: rclw %cl, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcrw %cl, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rcl_rcr_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: rclw %di # sched: [1:1.00] -; ATOM-NEXT: rcrw %di # sched: [1:1.00] -; ATOM-NEXT: rclw (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcrw (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rclw $7, %di # sched: [1:1.00] -; ATOM-NEXT: rcrw $7, %di # sched: [1:1.00] -; ATOM-NEXT: rclw $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcrw $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rclw %cl, %di # sched: [1:1.00] -; ATOM-NEXT: rcrw %cl, %di # sched: [1:1.00] -; ATOM-NEXT: rclw %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcrw %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rcl_rcr_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: rclw %di # sched: [1:1.00] -; SLM-NEXT: rcrw %di # sched: [1:1.00] -; SLM-NEXT: rclw (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcrw (%rdx) # sched: [4:2.00] -; SLM-NEXT: rclw $7, %di # sched: [1:1.00] -; SLM-NEXT: rcrw $7, %di # sched: [1:1.00] -; SLM-NEXT: rclw $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcrw $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rclw %cl, %di # sched: [1:1.00] -; SLM-NEXT: rcrw %cl, %di # sched: [1:1.00] -; SLM-NEXT: rclw %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcrw %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_rcl_rcr_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: rclw %di # sched: [2:1.50] -; SANDY-NEXT: rcrw %di # sched: [2:1.50] -; SANDY-NEXT: rclw (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcrw (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rclw $7, %di # sched: [5:4.00] -; SANDY-NEXT: rcrw $7, %di # sched: [5:4.00] -; SANDY-NEXT: rclw $7, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcrw $7, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rclw %cl, %di # sched: [5:4.00] -; SANDY-NEXT: rcrw %cl, %di # sched: [5:4.00] -; SANDY-NEXT: rclw %cl, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcrw %cl, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rcl_rcr_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: rclw %di # sched: [3:0.75] -; HASWELL-NEXT: rcrw %di # sched: [3:0.75] -; HASWELL-NEXT: rclw (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rcrw (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rclw $7, %di # sched: [3:0.75] -; HASWELL-NEXT: rcrw $7, %di # sched: [3:0.75] -; HASWELL-NEXT: rclw $7, (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rcrw $7, (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rclw %cl, %di # sched: [11:2.00] -; HASWELL-NEXT: rcrw %cl, %di # sched: [11:2.00] -; HASWELL-NEXT: rclw %cl, (%rdx) # sched: [16:2.00] -; HASWELL-NEXT: rcrw %cl, (%rdx) # sched: [19:2.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rcl_rcr_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: rclw %di # sched: [3:0.75] -; BROADWELL-NEXT: rcrw %di # sched: [3:0.75] -; BROADWELL-NEXT: rclw (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rcrw (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rclw $7, %di # sched: [3:0.75] -; BROADWELL-NEXT: rcrw $7, %di # sched: [3:0.75] -; BROADWELL-NEXT: rclw $7, (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rcrw $7, (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rclw %cl, %di # sched: [11:2.00] -; BROADWELL-NEXT: rcrw %cl, %di # sched: [11:2.00] -; BROADWELL-NEXT: rclw %cl, (%rdx) # sched: [15:2.00] -; BROADWELL-NEXT: rcrw %cl, (%rdx) # sched: [18:2.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rcl_rcr_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: rclw %di # sched: [3:0.75] -; SKYLAKE-NEXT: rcrw %di # sched: [3:0.75] -; SKYLAKE-NEXT: rclw (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rcrw (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rclw $7, %di # sched: [3:0.75] -; SKYLAKE-NEXT: rcrw $7, %di # sched: [3:0.75] -; SKYLAKE-NEXT: rclw $7, (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rcrw $7, (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rclw %cl, %di # sched: [11:2.00] -; SKYLAKE-NEXT: rcrw %cl, %di # sched: [11:2.00] -; SKYLAKE-NEXT: rclw %cl, (%rdx) # sched: [15:2.50] -; SKYLAKE-NEXT: rcrw %cl, (%rdx) # sched: [18:2.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rcl_rcr_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: rclw %di # sched: [3:0.75] -; SKX-NEXT: rcrw %di # sched: [3:0.75] -; SKX-NEXT: rclw (%rdx) # sched: [8:0.75] -; SKX-NEXT: rcrw (%rdx) # sched: [8:0.75] -; SKX-NEXT: rclw $7, %di # sched: [3:0.75] -; SKX-NEXT: rcrw $7, %di # sched: [3:0.75] -; SKX-NEXT: rclw $7, (%rdx) # sched: [8:0.75] -; SKX-NEXT: rcrw $7, (%rdx) # sched: [8:0.75] -; SKX-NEXT: rclw %cl, %di # sched: [11:2.00] -; SKX-NEXT: rcrw %cl, %di # sched: [11:2.00] -; SKX-NEXT: rclw %cl, (%rdx) # sched: [15:2.50] -; SKX-NEXT: rcrw %cl, (%rdx) # sched: [18:2.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_rcl_rcr_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: rclw %di # sched: [1:0.50] -; BDVER2-NEXT: rcrw %di # sched: [1:0.50] -; BDVER2-NEXT: rclw (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcrw (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rclw $7, %di # sched: [11:0.50] -; BDVER2-NEXT: rcrw $7, %di # sched: [10:0.50] -; BDVER2-NEXT: rclw $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcrw $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rclw %cl, %di # sched: [10:0.50] -; BDVER2-NEXT: rcrw %cl, %di # sched: [9:0.50] -; BDVER2-NEXT: rclw %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcrw %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_rcl_rcr_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: rclw %di # sched: [1:0.50] -; BTVER2-NEXT: rcrw %di # sched: [1:0.50] -; BTVER2-NEXT: rclw (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcrw (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rclw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: rcrw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: rclw $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcrw $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rclw %cl, %di # sched: [1:0.50] -; BTVER2-NEXT: rcrw %cl, %di # sched: [1:0.50] -; BTVER2-NEXT: rclw %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcrw %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_rcl_rcr_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: rclw %di # sched: [1:0.25] -; ZNVER1-NEXT: rcrw %di # sched: [1:0.25] -; ZNVER1-NEXT: rclw (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcrw (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rclw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: rcrw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: rclw $7, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcrw $7, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rclw %cl, %di # sched: [1:0.25] -; ZNVER1-NEXT: rcrw %cl, %di # sched: [1:0.25] -; ZNVER1-NEXT: rclw %cl, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcrw %cl, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "rclw $0 \0A\09 rcrw $0 \0A\09 rclw $2 \0A\09 rcrw $2 \0A\09 rclw $3, $0 \0A\09 rcrw $3, $0 \0A\09 rclw $3, $2 \0A\09 rcrw $3, $2 \0A\09 rclw %CL, $0 \0A\09 rcrw %CL, $0 \0A\09 rclw %CL, $2 \0A\09 rcrw %CL, $2", "r,r,*m,i"(i16 %a0, i16 %a1, i16 *%a2, i8 7) - ret void -} -define void @test_rcl_rcr_32(i32 %a0, i32 %a1, i32 *%a2) optsize { -; GENERIC-LABEL: test_rcl_rcr_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: rcll %edi # sched: [2:1.50] -; GENERIC-NEXT: rcrl %edi # sched: [2:1.50] -; GENERIC-NEXT: rcll (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcrl (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcll $7, %edi # sched: [5:4.00] -; GENERIC-NEXT: rcrl $7, %edi # sched: [5:4.00] -; GENERIC-NEXT: rcll $7, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcrl $7, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcll %cl, %edi # sched: [5:4.00] -; GENERIC-NEXT: rcrl %cl, %edi # sched: [5:4.00] -; GENERIC-NEXT: rcll %cl, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcrl %cl, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rcl_rcr_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: rcll %edi # sched: [1:1.00] -; ATOM-NEXT: rcrl %edi # sched: [1:1.00] -; ATOM-NEXT: rcll (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcrl (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcll $7, %edi # sched: [1:1.00] -; ATOM-NEXT: rcrl $7, %edi # sched: [1:1.00] -; ATOM-NEXT: rcll $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcrl $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcll %cl, %edi # sched: [1:1.00] -; ATOM-NEXT: rcrl %cl, %edi # sched: [1:1.00] -; ATOM-NEXT: rcll %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcrl %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rcl_rcr_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: rcll %edi # sched: [1:1.00] -; SLM-NEXT: rcrl %edi # sched: [1:1.00] -; SLM-NEXT: rcll (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcrl (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcll $7, %edi # sched: [1:1.00] -; SLM-NEXT: rcrl $7, %edi # sched: [1:1.00] -; SLM-NEXT: rcll $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcrl $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcll %cl, %edi # sched: [1:1.00] -; SLM-NEXT: rcrl %cl, %edi # sched: [1:1.00] -; SLM-NEXT: rcll %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcrl %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_rcl_rcr_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: rcll %edi # sched: [2:1.50] -; SANDY-NEXT: rcrl %edi # sched: [2:1.50] -; SANDY-NEXT: rcll (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcrl (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcll $7, %edi # sched: [5:4.00] -; SANDY-NEXT: rcrl $7, %edi # sched: [5:4.00] -; SANDY-NEXT: rcll $7, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcrl $7, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcll %cl, %edi # sched: [5:4.00] -; SANDY-NEXT: rcrl %cl, %edi # sched: [5:4.00] -; SANDY-NEXT: rcll %cl, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcrl %cl, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rcl_rcr_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: rcll %edi # sched: [3:0.75] -; HASWELL-NEXT: rcrl %edi # sched: [3:0.75] -; HASWELL-NEXT: rcll (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rcrl (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rcll $7, %edi # sched: [3:0.75] -; HASWELL-NEXT: rcrl $7, %edi # sched: [3:0.75] -; HASWELL-NEXT: rcll $7, (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rcrl $7, (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rcll %cl, %edi # sched: [11:2.00] -; HASWELL-NEXT: rcrl %cl, %edi # sched: [11:2.00] -; HASWELL-NEXT: rcll %cl, (%rdx) # sched: [16:2.00] -; HASWELL-NEXT: rcrl %cl, (%rdx) # sched: [19:2.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rcl_rcr_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: rcll %edi # sched: [3:0.75] -; BROADWELL-NEXT: rcrl %edi # sched: [3:0.75] -; BROADWELL-NEXT: rcll (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rcrl (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rcll $7, %edi # sched: [3:0.75] -; BROADWELL-NEXT: rcrl $7, %edi # sched: [3:0.75] -; BROADWELL-NEXT: rcll $7, (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rcrl $7, (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rcll %cl, %edi # sched: [11:2.00] -; BROADWELL-NEXT: rcrl %cl, %edi # sched: [11:2.00] -; BROADWELL-NEXT: rcll %cl, (%rdx) # sched: [15:2.00] -; BROADWELL-NEXT: rcrl %cl, (%rdx) # sched: [18:2.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rcl_rcr_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: rcll %edi # sched: [3:0.75] -; SKYLAKE-NEXT: rcrl %edi # sched: [3:0.75] -; SKYLAKE-NEXT: rcll (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rcrl (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rcll $7, %edi # sched: [3:0.75] -; SKYLAKE-NEXT: rcrl $7, %edi # sched: [3:0.75] -; SKYLAKE-NEXT: rcll $7, (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rcrl $7, (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rcll %cl, %edi # sched: [11:2.00] -; SKYLAKE-NEXT: rcrl %cl, %edi # sched: [11:2.00] -; SKYLAKE-NEXT: rcll %cl, (%rdx) # sched: [15:2.50] -; SKYLAKE-NEXT: rcrl %cl, (%rdx) # sched: [18:2.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rcl_rcr_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: rcll %edi # sched: [3:0.75] -; SKX-NEXT: rcrl %edi # sched: [3:0.75] -; SKX-NEXT: rcll (%rdx) # sched: [8:0.75] -; SKX-NEXT: rcrl (%rdx) # sched: [8:0.75] -; SKX-NEXT: rcll $7, %edi # sched: [3:0.75] -; SKX-NEXT: rcrl $7, %edi # sched: [3:0.75] -; SKX-NEXT: rcll $7, (%rdx) # sched: [8:0.75] -; SKX-NEXT: rcrl $7, (%rdx) # sched: [8:0.75] -; SKX-NEXT: rcll %cl, %edi # sched: [11:2.00] -; SKX-NEXT: rcrl %cl, %edi # sched: [11:2.00] -; SKX-NEXT: rcll %cl, (%rdx) # sched: [15:2.50] -; SKX-NEXT: rcrl %cl, (%rdx) # sched: [18:2.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_rcl_rcr_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: rcll %edi # sched: [1:0.50] -; BDVER2-NEXT: rcrl %edi # sched: [1:0.50] -; BDVER2-NEXT: rcll (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcrl (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcll $7, %edi # sched: [8:0.50] -; BDVER2-NEXT: rcrl $7, %edi # sched: [7:0.50] -; BDVER2-NEXT: rcll $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcrl $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcll %cl, %edi # sched: [7:0.50] -; BDVER2-NEXT: rcrl %cl, %edi # sched: [7:0.50] -; BDVER2-NEXT: rcll %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcrl %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_rcl_rcr_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: rcll %edi # sched: [1:0.50] -; BTVER2-NEXT: rcrl %edi # sched: [1:0.50] -; BTVER2-NEXT: rcll (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcrl (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcll $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: rcrl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: rcll $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcrl $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcll %cl, %edi # sched: [1:0.50] -; BTVER2-NEXT: rcrl %cl, %edi # sched: [1:0.50] -; BTVER2-NEXT: rcll %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcrl %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_rcl_rcr_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: rcll %edi # sched: [1:0.25] -; ZNVER1-NEXT: rcrl %edi # sched: [1:0.25] -; ZNVER1-NEXT: rcll (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcrl (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcll $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: rcrl $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: rcll $7, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcrl $7, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcll %cl, %edi # sched: [1:0.25] -; ZNVER1-NEXT: rcrl %cl, %edi # sched: [1:0.25] -; ZNVER1-NEXT: rcll %cl, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcrl %cl, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "rcll $0 \0A\09 rcrl $0 \0A\09 rcll $2 \0A\09 rcrl $2 \0A\09 rcll $3, $0 \0A\09 rcrl $3, $0 \0A\09 rcll $3, $2 \0A\09 rcrl $3, $2 \0A\09 rcll %CL, $0 \0A\09 rcrl %CL, $0 \0A\09 rcll %CL, $2 \0A\09 rcrl %CL, $2", "r,r,*m,i"(i32 %a0, i32 %a1, i32 *%a2, i8 7) - ret void -} -define void @test_rcl_rcr_64(i64 %a0, i64 %a1, i64 *%a2) optsize { -; GENERIC-LABEL: test_rcl_rcr_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: rclq %rdi # sched: [2:1.50] -; GENERIC-NEXT: rcrq %rdi # sched: [2:1.50] -; GENERIC-NEXT: rclq (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcrq (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rclq $7, %rdi # sched: [5:4.00] -; GENERIC-NEXT: rcrq $7, %rdi # sched: [5:4.00] -; GENERIC-NEXT: rclq $7, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcrq $7, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rclq %cl, %rdi # sched: [5:4.00] -; GENERIC-NEXT: rcrq %cl, %rdi # sched: [5:4.00] -; GENERIC-NEXT: rclq %cl, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: rcrq %cl, (%rdx) # sched: [11:3.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rcl_rcr_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: rclq %rdi # sched: [1:1.00] -; ATOM-NEXT: rcrq %rdi # sched: [1:1.00] -; ATOM-NEXT: rclq (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcrq (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rclq $7, %rdi # sched: [1:1.00] -; ATOM-NEXT: rcrq $7, %rdi # sched: [1:1.00] -; ATOM-NEXT: rclq $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcrq $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rclq %cl, %rdi # sched: [1:1.00] -; ATOM-NEXT: rcrq %cl, %rdi # sched: [1:1.00] -; ATOM-NEXT: rclq %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rcrq %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rcl_rcr_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: rclq %rdi # sched: [1:1.00] -; SLM-NEXT: rcrq %rdi # sched: [1:1.00] -; SLM-NEXT: rclq (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcrq (%rdx) # sched: [4:2.00] -; SLM-NEXT: rclq $7, %rdi # sched: [1:1.00] -; SLM-NEXT: rcrq $7, %rdi # sched: [1:1.00] -; SLM-NEXT: rclq $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcrq $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rclq %cl, %rdi # sched: [1:1.00] -; SLM-NEXT: rcrq %cl, %rdi # sched: [1:1.00] -; SLM-NEXT: rclq %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rcrq %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_rcl_rcr_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: rclq %rdi # sched: [2:1.50] -; SANDY-NEXT: rcrq %rdi # sched: [2:1.50] -; SANDY-NEXT: rclq (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcrq (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rclq $7, %rdi # sched: [5:4.00] -; SANDY-NEXT: rcrq $7, %rdi # sched: [5:4.00] -; SANDY-NEXT: rclq $7, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcrq $7, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rclq %cl, %rdi # sched: [5:4.00] -; SANDY-NEXT: rcrq %cl, %rdi # sched: [5:4.00] -; SANDY-NEXT: rclq %cl, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: rcrq %cl, (%rdx) # sched: [11:3.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rcl_rcr_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: rclq %rdi # sched: [3:0.75] -; HASWELL-NEXT: rcrq %rdi # sched: [3:0.75] -; HASWELL-NEXT: rclq (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rcrq (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rclq $7, %rdi # sched: [3:0.75] -; HASWELL-NEXT: rcrq $7, %rdi # sched: [3:0.75] -; HASWELL-NEXT: rclq $7, (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rcrq $7, (%rdx) # sched: [9:0.75] -; HASWELL-NEXT: rclq %cl, %rdi # sched: [11:2.00] -; HASWELL-NEXT: rcrq %cl, %rdi # sched: [11:2.00] -; HASWELL-NEXT: rclq %cl, (%rdx) # sched: [16:2.00] -; HASWELL-NEXT: rcrq %cl, (%rdx) # sched: [19:2.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rcl_rcr_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: rclq %rdi # sched: [3:0.75] -; BROADWELL-NEXT: rcrq %rdi # sched: [3:0.75] -; BROADWELL-NEXT: rclq (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rcrq (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rclq $7, %rdi # sched: [3:0.75] -; BROADWELL-NEXT: rcrq $7, %rdi # sched: [3:0.75] -; BROADWELL-NEXT: rclq $7, (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rcrq $7, (%rdx) # sched: [8:0.75] -; BROADWELL-NEXT: rclq %cl, %rdi # sched: [11:2.00] -; BROADWELL-NEXT: rcrq %cl, %rdi # sched: [11:2.00] -; BROADWELL-NEXT: rclq %cl, (%rdx) # sched: [15:2.00] -; BROADWELL-NEXT: rcrq %cl, (%rdx) # sched: [18:2.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rcl_rcr_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: rclq %rdi # sched: [3:0.75] -; SKYLAKE-NEXT: rcrq %rdi # sched: [3:0.75] -; SKYLAKE-NEXT: rclq (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rcrq (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rclq $7, %rdi # sched: [3:0.75] -; SKYLAKE-NEXT: rcrq $7, %rdi # sched: [3:0.75] -; SKYLAKE-NEXT: rclq $7, (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rcrq $7, (%rdx) # sched: [8:0.75] -; SKYLAKE-NEXT: rclq %cl, %rdi # sched: [11:2.00] -; SKYLAKE-NEXT: rcrq %cl, %rdi # sched: [11:2.00] -; SKYLAKE-NEXT: rclq %cl, (%rdx) # sched: [15:2.50] -; SKYLAKE-NEXT: rcrq %cl, (%rdx) # sched: [18:2.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rcl_rcr_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: rclq %rdi # sched: [3:0.75] -; SKX-NEXT: rcrq %rdi # sched: [3:0.75] -; SKX-NEXT: rclq (%rdx) # sched: [8:0.75] -; SKX-NEXT: rcrq (%rdx) # sched: [8:0.75] -; SKX-NEXT: rclq $7, %rdi # sched: [3:0.75] -; SKX-NEXT: rcrq $7, %rdi # sched: [3:0.75] -; SKX-NEXT: rclq $7, (%rdx) # sched: [8:0.75] -; SKX-NEXT: rcrq $7, (%rdx) # sched: [8:0.75] -; SKX-NEXT: rclq %cl, %rdi # sched: [11:2.00] -; SKX-NEXT: rcrq %cl, %rdi # sched: [11:2.00] -; SKX-NEXT: rclq %cl, (%rdx) # sched: [15:2.50] -; SKX-NEXT: rcrq %cl, (%rdx) # sched: [18:2.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_rcl_rcr_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: rclq %rdi # sched: [1:0.50] -; BDVER2-NEXT: rcrq %rdi # sched: [1:0.50] -; BDVER2-NEXT: rclq (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcrq (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rclq $7, %rdi # sched: [8:0.50] -; BDVER2-NEXT: rcrq $7, %rdi # sched: [7:0.50] -; BDVER2-NEXT: rclq $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcrq $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rclq %cl, %rdi # sched: [7:0.50] -; BDVER2-NEXT: rcrq %cl, %rdi # sched: [7:0.50] -; BDVER2-NEXT: rclq %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rcrq %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_rcl_rcr_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: rclq %rdi # sched: [1:0.50] -; BTVER2-NEXT: rcrq %rdi # sched: [1:0.50] -; BTVER2-NEXT: rclq (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcrq (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rclq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: rcrq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: rclq $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcrq $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rclq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: rcrq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: rclq %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rcrq %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_rcl_rcr_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: rclq %rdi # sched: [1:0.25] -; ZNVER1-NEXT: rcrq %rdi # sched: [1:0.25] -; ZNVER1-NEXT: rclq (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcrq (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rclq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: rcrq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: rclq $7, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcrq $7, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rclq %cl, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: rcrq %cl, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: rclq %cl, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: rcrq %cl, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "rclq $0 \0A\09 rcrq $0 \0A\09 rclq $2 \0A\09 rcrq $2 \0A\09 rclq $3, $0 \0A\09 rcrq $3, $0 \0A\09 rclq $3, $2 \0A\09 rcrq $3, $2 \0A\09 rclq %CL, $0 \0A\09 rcrq %CL, $0 \0A\09 rclq %CL, $2 \0A\09 rcrq %CL, $2", "r,r,*m,i"(i64 %a0, i64 %a1, i64 *%a2, i8 7) - ret void -} - -define void @test_rdmsr_wrmsr() optsize { -; GENERIC-LABEL: test_rdmsr_wrmsr: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: rdmsr # sched: [100:0.33] -; GENERIC-NEXT: wrmsr # sched: [100:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rdmsr_wrmsr: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: rdmsr # sched: [78:39.00] -; ATOM-NEXT: wrmsr # sched: [202:101.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rdmsr_wrmsr: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: rdmsr # sched: [100:1.00] -; SLM-NEXT: wrmsr # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_rdmsr_wrmsr: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: rdmsr # sched: [100:0.33] -; SANDY-NEXT: wrmsr # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rdmsr_wrmsr: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: rdmsr # sched: [100:0.25] -; HASWELL-NEXT: wrmsr # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rdmsr_wrmsr: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: rdmsr # sched: [100:0.25] -; BROADWELL-NEXT: wrmsr # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rdmsr_wrmsr: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: rdmsr # sched: [100:0.25] -; SKYLAKE-NEXT: wrmsr # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rdmsr_wrmsr: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: rdmsr # sched: [100:0.25] -; SKX-NEXT: wrmsr # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_rdmsr_wrmsr: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: rdmsr # sched: [100:0.50] -; BDVER2-NEXT: wrmsr # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_rdmsr_wrmsr: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: rdmsr # sched: [100:0.50] -; BTVER2-NEXT: wrmsr # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_rdmsr_wrmsr: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: rdmsr # sched: [100:0.25] -; ZNVER1-NEXT: wrmsr # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "rdmsr \0A\09 wrmsr", ""() - ret void -} - -define void @test_rdpmc() optsize { -; GENERIC-LABEL: test_rdpmc: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: rdpmc # sched: [100:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rdpmc: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: rdpmc # sched: [46:23.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rdpmc: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: rdpmc # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_rdpmc: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: rdpmc # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rdpmc: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: rdpmc # sched: [1:8.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rdpmc: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: rdpmc # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rdpmc: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: rdpmc # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rdpmc: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: rdpmc # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_rdpmc: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: rdpmc # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_rdpmc: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: rdpmc # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_rdpmc: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: rdpmc # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "rdpmc", ""() - ret void -} - -define void @test_rdtsc_rdtscp() optsize { -; GENERIC-LABEL: test_rdtsc_rdtscp: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: rdtsc # sched: [100:0.33] -; GENERIC-NEXT: rdtscp # sched: [100:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rdtsc_rdtscp: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: rdtsc # sched: [30:15.00] -; ATOM-NEXT: rdtscp # sched: [30:15.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rdtsc_rdtscp: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: rdtsc # sched: [100:1.00] -; SLM-NEXT: rdtscp # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_rdtsc_rdtscp: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: rdtsc # sched: [100:0.33] -; SANDY-NEXT: rdtscp # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rdtsc_rdtscp: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: rdtsc # sched: [18:2.00] -; HASWELL-NEXT: rdtscp # sched: [42:5.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rdtsc_rdtscp: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: rdtsc # sched: [18:2.00] -; BROADWELL-NEXT: rdtscp # sched: [42:5.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rdtsc_rdtscp: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: rdtsc # sched: [18:2.00] -; SKYLAKE-NEXT: rdtscp # sched: [42:5.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rdtsc_rdtscp: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: rdtsc # sched: [18:2.00] -; SKX-NEXT: rdtscp # sched: [42:5.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_rdtsc_rdtscp: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: rdtsc # sched: [100:0.50] -; BDVER2-NEXT: rdtscp # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_rdtsc_rdtscp: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: rdtsc # sched: [100:0.50] -; BTVER2-NEXT: rdtscp # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_rdtsc_rdtscp: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: rdtsc # sched: [100:0.25] -; ZNVER1-NEXT: rdtscp # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "rdtsc \0A\09 rdtscp", ""() - ret void -} - -define void @test_ret() optsize { -; GENERIC-LABEL: test_ret: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; GENERIC-NEXT: retq $4095 # imm = 0xFFF -; GENERIC-NEXT: # sched: [6:1.00] -; GENERIC-NEXT: lretl # sched: [6:1.00] -; GENERIC-NEXT: lretl $4095 # imm = 0xFFF -; GENERIC-NEXT: # sched: [6:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_ret: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: retq # sched: [79:39.50] -; ATOM-NEXT: retq $4095 # imm = 0xFFF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: lretl # sched: [79:39.50] -; ATOM-NEXT: lretl $4095 # imm = 0xFFF -; ATOM-NEXT: # sched: [79:39.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_ret: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: retq # sched: [4:1.00] -; SLM-NEXT: retq $4095 # imm = 0xFFF -; SLM-NEXT: # sched: [4:1.00] -; SLM-NEXT: lretl # sched: [4:1.00] -; SLM-NEXT: lretl $4095 # imm = 0xFFF -; SLM-NEXT: # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_ret: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: retq # sched: [1:1.00] -; SANDY-NEXT: retq $4095 # imm = 0xFFF -; SANDY-NEXT: # sched: [6:1.00] -; SANDY-NEXT: lretl # sched: [6:1.00] -; SANDY-NEXT: lretl $4095 # imm = 0xFFF -; SANDY-NEXT: # sched: [6:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_ret: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; HASWELL-NEXT: retq $4095 # imm = 0xFFF -; HASWELL-NEXT: # sched: [1:2.00] -; HASWELL-NEXT: lretl # sched: [6:0.50] -; HASWELL-NEXT: lretl $4095 # imm = 0xFFF -; HASWELL-NEXT: # sched: [1:2.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ret: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; BROADWELL-NEXT: retq $4095 # imm = 0xFFF -; BROADWELL-NEXT: # sched: [6:0.50] -; BROADWELL-NEXT: lretl # sched: [6:0.50] -; BROADWELL-NEXT: lretl $4095 # imm = 0xFFF -; BROADWELL-NEXT: # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ret: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; SKYLAKE-NEXT: retq $4095 # imm = 0xFFF -; SKYLAKE-NEXT: # sched: [6:0.50] -; SKYLAKE-NEXT: lretl # sched: [6:0.50] -; SKYLAKE-NEXT: lretl $4095 # imm = 0xFFF -; SKYLAKE-NEXT: # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_ret: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: retq # sched: [7:1.00] -; SKX-NEXT: retq $4095 # imm = 0xFFF -; SKX-NEXT: # sched: [6:0.50] -; SKX-NEXT: lretl # sched: [6:0.50] -; SKX-NEXT: lretl $4095 # imm = 0xFFF -; SKX-NEXT: # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_ret: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; BDVER2-NEXT: retq $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [5:1.00] -; BDVER2-NEXT: lretl # sched: [5:1.00] -; BDVER2-NEXT: lretl $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_ret: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; BTVER2-NEXT: retq $4095 # imm = 0xFFF -; BTVER2-NEXT: # sched: [4:1.00] -; BTVER2-NEXT: lretl # sched: [4:1.00] -; BTVER2-NEXT: lretl $4095 # imm = 0xFFF -; BTVER2-NEXT: # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ret: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: retq # sched: [1:0.50] -; ZNVER1-NEXT: retq $4095 # imm = 0xFFF -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: lretl # sched: [1:0.50] -; ZNVER1-NEXT: lretl $4095 # imm = 0xFFF -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "ret \0A\09 ret $0 \0A\09 lret \0A\09 lret $0", "i"(i16 4095) - ret void -} - -define void @test_rol_ror_8(i8 %a0, i8 %a1, i8 *%a2) optsize { -; GENERIC-LABEL: test_rol_ror_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: rolb %dil # sched: [2:1.00] -; GENERIC-NEXT: rorb %dil # sched: [2:1.00] -; GENERIC-NEXT: rolb (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rorb (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rolb $7, %dil # sched: [2:1.00] -; GENERIC-NEXT: rorb $7, %dil # sched: [2:1.00] -; GENERIC-NEXT: rolb $7, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rorb $7, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rolb %cl, %dil # sched: [3:1.50] -; GENERIC-NEXT: rorb %cl, %dil # sched: [3:1.50] -; GENERIC-NEXT: rolb %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: rorb %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rol_ror_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: rolb %dil # sched: [1:1.00] -; ATOM-NEXT: rorb %dil # sched: [1:1.00] -; ATOM-NEXT: rolb (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rorb (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rolb $7, %dil # sched: [1:1.00] -; ATOM-NEXT: rorb $7, %dil # sched: [1:1.00] -; ATOM-NEXT: rolb $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rorb $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rolb %cl, %dil # sched: [1:1.00] -; ATOM-NEXT: rorb %cl, %dil # sched: [1:1.00] -; ATOM-NEXT: rolb %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rorb %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rol_ror_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: rolb %dil # sched: [1:1.00] -; SLM-NEXT: rorb %dil # sched: [1:1.00] -; SLM-NEXT: rolb (%rdx) # sched: [4:2.00] -; SLM-NEXT: rorb (%rdx) # sched: [4:2.00] -; SLM-NEXT: rolb $7, %dil # sched: [1:1.00] -; SLM-NEXT: rorb $7, %dil # sched: [1:1.00] -; SLM-NEXT: rolb $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rorb $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rolb %cl, %dil # sched: [1:1.00] -; SLM-NEXT: rorb %cl, %dil # sched: [1:1.00] -; SLM-NEXT: rolb %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rorb %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_rol_ror_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: rolb %dil # sched: [2:1.00] -; SANDY-NEXT: rorb %dil # sched: [2:1.00] -; SANDY-NEXT: rolb (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rorb (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rolb $7, %dil # sched: [2:1.00] -; SANDY-NEXT: rorb $7, %dil # sched: [2:1.00] -; SANDY-NEXT: rolb $7, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rorb $7, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rolb %cl, %dil # sched: [3:1.50] -; SANDY-NEXT: rorb %cl, %dil # sched: [3:1.50] -; SANDY-NEXT: rolb %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: rorb %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rol_ror_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: rolb %dil # sched: [2:1.00] -; HASWELL-NEXT: rorb %dil # sched: [2:1.00] -; HASWELL-NEXT: rolb (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rorb (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rolb $7, %dil # sched: [2:1.00] -; HASWELL-NEXT: rorb $7, %dil # sched: [2:1.00] -; HASWELL-NEXT: rolb $7, (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rorb $7, (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rolb %cl, %dil # sched: [3:1.00] -; HASWELL-NEXT: rorb %cl, %dil # sched: [3:1.00] -; HASWELL-NEXT: rolb %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: rorb %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rol_ror_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: rolb %dil # sched: [2:1.00] -; BROADWELL-NEXT: rorb %dil # sched: [2:1.00] -; BROADWELL-NEXT: rolb (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rorb (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rolb $7, %dil # sched: [2:1.00] -; BROADWELL-NEXT: rorb $7, %dil # sched: [2:1.00] -; BROADWELL-NEXT: rolb $7, (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rorb $7, (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rolb %cl, %dil # sched: [3:1.00] -; BROADWELL-NEXT: rorb %cl, %dil # sched: [3:1.00] -; BROADWELL-NEXT: rolb %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: rorb %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rol_ror_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: rolb %dil # sched: [2:1.00] -; SKYLAKE-NEXT: rorb %dil # sched: [2:1.00] -; SKYLAKE-NEXT: rolb (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rorb (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rolb $7, %dil # sched: [2:1.00] -; SKYLAKE-NEXT: rorb $7, %dil # sched: [2:1.00] -; SKYLAKE-NEXT: rolb $7, (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rorb $7, (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rolb %cl, %dil # sched: [3:1.50] -; SKYLAKE-NEXT: rorb %cl, %dil # sched: [3:1.50] -; SKYLAKE-NEXT: rolb %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: rorb %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rol_ror_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: rolb %dil # sched: [2:1.00] -; SKX-NEXT: rorb %dil # sched: [2:1.00] -; SKX-NEXT: rolb (%rdx) # sched: [7:1.00] -; SKX-NEXT: rorb (%rdx) # sched: [7:1.00] -; SKX-NEXT: rolb $7, %dil # sched: [2:1.00] -; SKX-NEXT: rorb $7, %dil # sched: [2:1.00] -; SKX-NEXT: rolb $7, (%rdx) # sched: [7:1.00] -; SKX-NEXT: rorb $7, (%rdx) # sched: [7:1.00] -; SKX-NEXT: rolb %cl, %dil # sched: [3:1.50] -; SKX-NEXT: rorb %cl, %dil # sched: [3:1.50] -; SKX-NEXT: rolb %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: rorb %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_rol_ror_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: rolb %dil # sched: [1:0.50] -; BDVER2-NEXT: rorb %dil # sched: [1:0.50] -; BDVER2-NEXT: rolb (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rorb (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rolb $7, %dil # sched: [1:0.50] -; BDVER2-NEXT: rorb $7, %dil # sched: [1:0.50] -; BDVER2-NEXT: rolb $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rorb $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rolb %cl, %dil # sched: [1:0.50] -; BDVER2-NEXT: rorb %cl, %dil # sched: [1:0.50] -; BDVER2-NEXT: rolb %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rorb %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_rol_ror_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: rolb %dil # sched: [1:0.50] -; BTVER2-NEXT: rorb %dil # sched: [1:0.50] -; BTVER2-NEXT: rolb (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rorb (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rolb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: rorb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: rolb $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rorb $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rolb %cl, %dil # sched: [1:0.50] -; BTVER2-NEXT: rorb %cl, %dil # sched: [1:0.50] -; BTVER2-NEXT: rolb %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rorb %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_rol_ror_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: rolb %dil # sched: [1:0.25] -; ZNVER1-NEXT: rorb %dil # sched: [1:0.25] -; ZNVER1-NEXT: rolb (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rorb (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rolb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: rorb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: rolb $7, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rorb $7, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rolb %cl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: rorb %cl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: rolb %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rorb %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "rolb $0 \0A\09 rorb $0 \0A\09 rolb $2 \0A\09 rorb $2 \0A\09 rolb $3, $0 \0A\09 rorb $3, $0 \0A\09 rolb $3, $2 \0A\09 rorb $3, $2 \0A\09 rolb %CL, $0 \0A\09 rorb %CL, $0 \0A\09 rolb %CL, $2 \0A\09 rorb %CL, $2", "r,r,*m,i"(i8 %a0, i8 %a1, i8 *%a2, i8 7) - ret void -} -define void @test_rol_ror_16(i16 %a0, i16 %a1, i16 *%a2) optsize { -; GENERIC-LABEL: test_rol_ror_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: rolw %di # sched: [2:1.00] -; GENERIC-NEXT: rorw %di # sched: [2:1.00] -; GENERIC-NEXT: rolw (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rorw (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rolw $7, %di # sched: [2:1.00] -; GENERIC-NEXT: rorw $7, %di # sched: [2:1.00] -; GENERIC-NEXT: rolw $7, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rorw $7, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rolw %cl, %di # sched: [3:1.50] -; GENERIC-NEXT: rorw %cl, %di # sched: [3:1.50] -; GENERIC-NEXT: rolw %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: rorw %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rol_ror_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: rolw %di # sched: [1:1.00] -; ATOM-NEXT: rorw %di # sched: [1:1.00] -; ATOM-NEXT: rolw (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rorw (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rolw $7, %di # sched: [1:1.00] -; ATOM-NEXT: rorw $7, %di # sched: [1:1.00] -; ATOM-NEXT: rolw $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rorw $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rolw %cl, %di # sched: [1:1.00] -; ATOM-NEXT: rorw %cl, %di # sched: [1:1.00] -; ATOM-NEXT: rolw %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rorw %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rol_ror_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: rolw %di # sched: [1:1.00] -; SLM-NEXT: rorw %di # sched: [1:1.00] -; SLM-NEXT: rolw (%rdx) # sched: [4:2.00] -; SLM-NEXT: rorw (%rdx) # sched: [4:2.00] -; SLM-NEXT: rolw $7, %di # sched: [1:1.00] -; SLM-NEXT: rorw $7, %di # sched: [1:1.00] -; SLM-NEXT: rolw $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rorw $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rolw %cl, %di # sched: [1:1.00] -; SLM-NEXT: rorw %cl, %di # sched: [1:1.00] -; SLM-NEXT: rolw %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rorw %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_rol_ror_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: rolw %di # sched: [2:1.00] -; SANDY-NEXT: rorw %di # sched: [2:1.00] -; SANDY-NEXT: rolw (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rorw (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rolw $7, %di # sched: [2:1.00] -; SANDY-NEXT: rorw $7, %di # sched: [2:1.00] -; SANDY-NEXT: rolw $7, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rorw $7, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rolw %cl, %di # sched: [3:1.50] -; SANDY-NEXT: rorw %cl, %di # sched: [3:1.50] -; SANDY-NEXT: rolw %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: rorw %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rol_ror_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: rolw %di # sched: [2:1.00] -; HASWELL-NEXT: rorw %di # sched: [2:1.00] -; HASWELL-NEXT: rolw (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rorw (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rolw $7, %di # sched: [2:1.00] -; HASWELL-NEXT: rorw $7, %di # sched: [2:1.00] -; HASWELL-NEXT: rolw $7, (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rorw $7, (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rolw %cl, %di # sched: [3:1.00] -; HASWELL-NEXT: rorw %cl, %di # sched: [3:1.00] -; HASWELL-NEXT: rolw %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: rorw %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rol_ror_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: rolw %di # sched: [2:1.00] -; BROADWELL-NEXT: rorw %di # sched: [2:1.00] -; BROADWELL-NEXT: rolw (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rorw (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rolw $7, %di # sched: [2:1.00] -; BROADWELL-NEXT: rorw $7, %di # sched: [2:1.00] -; BROADWELL-NEXT: rolw $7, (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rorw $7, (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rolw %cl, %di # sched: [3:1.00] -; BROADWELL-NEXT: rorw %cl, %di # sched: [3:1.00] -; BROADWELL-NEXT: rolw %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: rorw %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rol_ror_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: rolw %di # sched: [2:1.00] -; SKYLAKE-NEXT: rorw %di # sched: [2:1.00] -; SKYLAKE-NEXT: rolw (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rorw (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rolw $7, %di # sched: [2:1.00] -; SKYLAKE-NEXT: rorw $7, %di # sched: [2:1.00] -; SKYLAKE-NEXT: rolw $7, (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rorw $7, (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rolw %cl, %di # sched: [3:1.50] -; SKYLAKE-NEXT: rorw %cl, %di # sched: [3:1.50] -; SKYLAKE-NEXT: rolw %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: rorw %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rol_ror_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: rolw %di # sched: [2:1.00] -; SKX-NEXT: rorw %di # sched: [2:1.00] -; SKX-NEXT: rolw (%rdx) # sched: [7:1.00] -; SKX-NEXT: rorw (%rdx) # sched: [7:1.00] -; SKX-NEXT: rolw $7, %di # sched: [2:1.00] -; SKX-NEXT: rorw $7, %di # sched: [2:1.00] -; SKX-NEXT: rolw $7, (%rdx) # sched: [7:1.00] -; SKX-NEXT: rorw $7, (%rdx) # sched: [7:1.00] -; SKX-NEXT: rolw %cl, %di # sched: [3:1.50] -; SKX-NEXT: rorw %cl, %di # sched: [3:1.50] -; SKX-NEXT: rolw %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: rorw %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_rol_ror_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: rolw %di # sched: [1:0.50] -; BDVER2-NEXT: rorw %di # sched: [1:0.50] -; BDVER2-NEXT: rolw (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rorw (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rolw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: rorw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: rolw $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rorw $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rolw %cl, %di # sched: [1:0.50] -; BDVER2-NEXT: rorw %cl, %di # sched: [1:0.50] -; BDVER2-NEXT: rolw %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rorw %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_rol_ror_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: rolw %di # sched: [1:0.50] -; BTVER2-NEXT: rorw %di # sched: [1:0.50] -; BTVER2-NEXT: rolw (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rorw (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rolw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: rorw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: rolw $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rorw $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rolw %cl, %di # sched: [1:0.50] -; BTVER2-NEXT: rorw %cl, %di # sched: [1:0.50] -; BTVER2-NEXT: rolw %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rorw %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_rol_ror_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: rolw %di # sched: [1:0.25] -; ZNVER1-NEXT: rorw %di # sched: [1:0.25] -; ZNVER1-NEXT: rolw (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rorw (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rolw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: rorw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: rolw $7, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rorw $7, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rolw %cl, %di # sched: [1:0.25] -; ZNVER1-NEXT: rorw %cl, %di # sched: [1:0.25] -; ZNVER1-NEXT: rolw %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rorw %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "rolw $0 \0A\09 rorw $0 \0A\09 rolw $2 \0A\09 rorw $2 \0A\09 rolw $3, $0 \0A\09 rorw $3, $0 \0A\09 rolw $3, $2 \0A\09 rorw $3, $2 \0A\09 rolw %CL, $0 \0A\09 rorw %CL, $0 \0A\09 rolw %CL, $2 \0A\09 rorw %CL, $2", "r,r,*m,i"(i16 %a0, i16 %a1, i16 *%a2, i8 7) - ret void -} -define void @test_rol_ror_32(i32 %a0, i32 %a1, i32 *%a2) optsize { -; GENERIC-LABEL: test_rol_ror_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: roll %edi # sched: [2:1.00] -; GENERIC-NEXT: rorl %edi # sched: [2:1.00] -; GENERIC-NEXT: roll (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rorl (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: roll $7, %edi # sched: [2:1.00] -; GENERIC-NEXT: rorl $7, %edi # sched: [2:1.00] -; GENERIC-NEXT: roll $7, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rorl $7, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: roll %cl, %edi # sched: [3:1.50] -; GENERIC-NEXT: rorl %cl, %edi # sched: [3:1.50] -; GENERIC-NEXT: roll %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: rorl %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rol_ror_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: roll %edi # sched: [1:1.00] -; ATOM-NEXT: rorl %edi # sched: [1:1.00] -; ATOM-NEXT: roll (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rorl (%rdx) # sched: [1:1.00] -; ATOM-NEXT: roll $7, %edi # sched: [1:1.00] -; ATOM-NEXT: rorl $7, %edi # sched: [1:1.00] -; ATOM-NEXT: roll $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rorl $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: roll %cl, %edi # sched: [1:1.00] -; ATOM-NEXT: rorl %cl, %edi # sched: [1:1.00] -; ATOM-NEXT: roll %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rorl %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rol_ror_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: roll %edi # sched: [1:1.00] -; SLM-NEXT: rorl %edi # sched: [1:1.00] -; SLM-NEXT: roll (%rdx) # sched: [4:2.00] -; SLM-NEXT: rorl (%rdx) # sched: [4:2.00] -; SLM-NEXT: roll $7, %edi # sched: [1:1.00] -; SLM-NEXT: rorl $7, %edi # sched: [1:1.00] -; SLM-NEXT: roll $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rorl $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: roll %cl, %edi # sched: [1:1.00] -; SLM-NEXT: rorl %cl, %edi # sched: [1:1.00] -; SLM-NEXT: roll %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rorl %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_rol_ror_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: roll %edi # sched: [2:1.00] -; SANDY-NEXT: rorl %edi # sched: [2:1.00] -; SANDY-NEXT: roll (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rorl (%rdx) # sched: [8:1.00] -; SANDY-NEXT: roll $7, %edi # sched: [2:1.00] -; SANDY-NEXT: rorl $7, %edi # sched: [2:1.00] -; SANDY-NEXT: roll $7, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rorl $7, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: roll %cl, %edi # sched: [3:1.50] -; SANDY-NEXT: rorl %cl, %edi # sched: [3:1.50] -; SANDY-NEXT: roll %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: rorl %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rol_ror_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: roll %edi # sched: [2:1.00] -; HASWELL-NEXT: rorl %edi # sched: [2:1.00] -; HASWELL-NEXT: roll (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rorl (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: roll $7, %edi # sched: [2:1.00] -; HASWELL-NEXT: rorl $7, %edi # sched: [2:1.00] -; HASWELL-NEXT: roll $7, (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rorl $7, (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: roll %cl, %edi # sched: [3:1.00] -; HASWELL-NEXT: rorl %cl, %edi # sched: [3:1.00] -; HASWELL-NEXT: roll %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: rorl %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rol_ror_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: roll %edi # sched: [2:1.00] -; BROADWELL-NEXT: rorl %edi # sched: [2:1.00] -; BROADWELL-NEXT: roll (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rorl (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: roll $7, %edi # sched: [2:1.00] -; BROADWELL-NEXT: rorl $7, %edi # sched: [2:1.00] -; BROADWELL-NEXT: roll $7, (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rorl $7, (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: roll %cl, %edi # sched: [3:1.00] -; BROADWELL-NEXT: rorl %cl, %edi # sched: [3:1.00] -; BROADWELL-NEXT: roll %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: rorl %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rol_ror_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: roll %edi # sched: [2:1.00] -; SKYLAKE-NEXT: rorl %edi # sched: [2:1.00] -; SKYLAKE-NEXT: roll (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rorl (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: roll $7, %edi # sched: [2:1.00] -; SKYLAKE-NEXT: rorl $7, %edi # sched: [2:1.00] -; SKYLAKE-NEXT: roll $7, (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rorl $7, (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: roll %cl, %edi # sched: [3:1.50] -; SKYLAKE-NEXT: rorl %cl, %edi # sched: [3:1.50] -; SKYLAKE-NEXT: roll %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: rorl %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rol_ror_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: roll %edi # sched: [2:1.00] -; SKX-NEXT: rorl %edi # sched: [2:1.00] -; SKX-NEXT: roll (%rdx) # sched: [7:1.00] -; SKX-NEXT: rorl (%rdx) # sched: [7:1.00] -; SKX-NEXT: roll $7, %edi # sched: [2:1.00] -; SKX-NEXT: rorl $7, %edi # sched: [2:1.00] -; SKX-NEXT: roll $7, (%rdx) # sched: [7:1.00] -; SKX-NEXT: rorl $7, (%rdx) # sched: [7:1.00] -; SKX-NEXT: roll %cl, %edi # sched: [3:1.50] -; SKX-NEXT: rorl %cl, %edi # sched: [3:1.50] -; SKX-NEXT: roll %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: rorl %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_rol_ror_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: roll %edi # sched: [1:0.50] -; BDVER2-NEXT: rorl %edi # sched: [1:0.50] -; BDVER2-NEXT: roll (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rorl (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: roll $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: rorl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: roll $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rorl $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: roll %cl, %edi # sched: [1:0.50] -; BDVER2-NEXT: rorl %cl, %edi # sched: [1:0.50] -; BDVER2-NEXT: roll %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rorl %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_rol_ror_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: roll %edi # sched: [1:0.50] -; BTVER2-NEXT: rorl %edi # sched: [1:0.50] -; BTVER2-NEXT: roll (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rorl (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: roll $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: rorl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: roll $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rorl $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: roll %cl, %edi # sched: [1:0.50] -; BTVER2-NEXT: rorl %cl, %edi # sched: [1:0.50] -; BTVER2-NEXT: roll %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rorl %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_rol_ror_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: roll %edi # sched: [1:0.25] -; ZNVER1-NEXT: rorl %edi # sched: [1:0.25] -; ZNVER1-NEXT: roll (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rorl (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: roll $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: rorl $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: roll $7, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rorl $7, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: roll %cl, %edi # sched: [1:0.25] -; ZNVER1-NEXT: rorl %cl, %edi # sched: [1:0.25] -; ZNVER1-NEXT: roll %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rorl %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "roll $0 \0A\09 rorl $0 \0A\09 roll $2 \0A\09 rorl $2 \0A\09 roll $3, $0 \0A\09 rorl $3, $0 \0A\09 roll $3, $2 \0A\09 rorl $3, $2 \0A\09 roll %CL, $0 \0A\09 rorl %CL, $0 \0A\09 roll %CL, $2 \0A\09 rorl %CL, $2", "r,r,*m,i"(i32 %a0, i32 %a1, i32 *%a2, i8 7) - ret void -} -define void @test_rol_ror_64(i64 %a0, i64 %a1, i64 *%a2) optsize { -; GENERIC-LABEL: test_rol_ror_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: rolq %rdi # sched: [2:1.00] -; GENERIC-NEXT: rorq %rdi # sched: [2:1.00] -; GENERIC-NEXT: rolq (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rorq (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rolq $7, %rdi # sched: [2:1.00] -; GENERIC-NEXT: rorq $7, %rdi # sched: [2:1.00] -; GENERIC-NEXT: rolq $7, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rorq $7, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: rolq %cl, %rdi # sched: [3:1.50] -; GENERIC-NEXT: rorq %cl, %rdi # sched: [3:1.50] -; GENERIC-NEXT: rolq %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: rorq %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rol_ror_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: rolq %rdi # sched: [1:1.00] -; ATOM-NEXT: rorq %rdi # sched: [1:1.00] -; ATOM-NEXT: rolq (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rorq (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rolq $7, %rdi # sched: [1:1.00] -; ATOM-NEXT: rorq $7, %rdi # sched: [1:1.00] -; ATOM-NEXT: rolq $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rorq $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rolq %cl, %rdi # sched: [1:1.00] -; ATOM-NEXT: rorq %cl, %rdi # sched: [1:1.00] -; ATOM-NEXT: rolq %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: rorq %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rol_ror_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: rolq %rdi # sched: [1:1.00] -; SLM-NEXT: rorq %rdi # sched: [1:1.00] -; SLM-NEXT: rolq (%rdx) # sched: [4:2.00] -; SLM-NEXT: rorq (%rdx) # sched: [4:2.00] -; SLM-NEXT: rolq $7, %rdi # sched: [1:1.00] -; SLM-NEXT: rorq $7, %rdi # sched: [1:1.00] -; SLM-NEXT: rolq $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rorq $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rolq %cl, %rdi # sched: [1:1.00] -; SLM-NEXT: rorq %cl, %rdi # sched: [1:1.00] -; SLM-NEXT: rolq %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: rorq %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_rol_ror_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: rolq %rdi # sched: [2:1.00] -; SANDY-NEXT: rorq %rdi # sched: [2:1.00] -; SANDY-NEXT: rolq (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rorq (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rolq $7, %rdi # sched: [2:1.00] -; SANDY-NEXT: rorq $7, %rdi # sched: [2:1.00] -; SANDY-NEXT: rolq $7, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rorq $7, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: rolq %cl, %rdi # sched: [3:1.50] -; SANDY-NEXT: rorq %cl, %rdi # sched: [3:1.50] -; SANDY-NEXT: rolq %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: rorq %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_rol_ror_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: rolq %rdi # sched: [2:1.00] -; HASWELL-NEXT: rorq %rdi # sched: [2:1.00] -; HASWELL-NEXT: rolq (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rorq (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rolq $7, %rdi # sched: [2:1.00] -; HASWELL-NEXT: rorq $7, %rdi # sched: [2:1.00] -; HASWELL-NEXT: rolq $7, (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rorq $7, (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: rolq %cl, %rdi # sched: [3:1.00] -; HASWELL-NEXT: rorq %cl, %rdi # sched: [3:1.00] -; HASWELL-NEXT: rolq %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: rorq %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rol_ror_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: rolq %rdi # sched: [2:1.00] -; BROADWELL-NEXT: rorq %rdi # sched: [2:1.00] -; BROADWELL-NEXT: rolq (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rorq (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rolq $7, %rdi # sched: [2:1.00] -; BROADWELL-NEXT: rorq $7, %rdi # sched: [2:1.00] -; BROADWELL-NEXT: rolq $7, (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rorq $7, (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: rolq %cl, %rdi # sched: [3:1.00] -; BROADWELL-NEXT: rorq %cl, %rdi # sched: [3:1.00] -; BROADWELL-NEXT: rolq %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: rorq %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rol_ror_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: rolq %rdi # sched: [2:1.00] -; SKYLAKE-NEXT: rorq %rdi # sched: [2:1.00] -; SKYLAKE-NEXT: rolq (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rorq (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rolq $7, %rdi # sched: [2:1.00] -; SKYLAKE-NEXT: rorq $7, %rdi # sched: [2:1.00] -; SKYLAKE-NEXT: rolq $7, (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rorq $7, (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: rolq %cl, %rdi # sched: [3:1.50] -; SKYLAKE-NEXT: rorq %cl, %rdi # sched: [3:1.50] -; SKYLAKE-NEXT: rolq %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: rorq %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rol_ror_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: rolq %rdi # sched: [2:1.00] -; SKX-NEXT: rorq %rdi # sched: [2:1.00] -; SKX-NEXT: rolq (%rdx) # sched: [7:1.00] -; SKX-NEXT: rorq (%rdx) # sched: [7:1.00] -; SKX-NEXT: rolq $7, %rdi # sched: [2:1.00] -; SKX-NEXT: rorq $7, %rdi # sched: [2:1.00] -; SKX-NEXT: rolq $7, (%rdx) # sched: [7:1.00] -; SKX-NEXT: rorq $7, (%rdx) # sched: [7:1.00] -; SKX-NEXT: rolq %cl, %rdi # sched: [3:1.50] -; SKX-NEXT: rorq %cl, %rdi # sched: [3:1.50] -; SKX-NEXT: rolq %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: rorq %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_rol_ror_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: rolq %rdi # sched: [1:0.50] -; BDVER2-NEXT: rorq %rdi # sched: [1:0.50] -; BDVER2-NEXT: rolq (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rorq (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rolq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: rorq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: rolq $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rorq $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rolq %cl, %rdi # sched: [1:0.50] -; BDVER2-NEXT: rorq %cl, %rdi # sched: [1:0.50] -; BDVER2-NEXT: rolq %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: rorq %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_rol_ror_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: rolq %rdi # sched: [1:0.50] -; BTVER2-NEXT: rorq %rdi # sched: [1:0.50] -; BTVER2-NEXT: rolq (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rorq (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rolq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: rorq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: rolq $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rorq $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rolq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: rorq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: rolq %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: rorq %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_rol_ror_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: rolq %rdi # sched: [1:0.25] -; ZNVER1-NEXT: rorq %rdi # sched: [1:0.25] -; ZNVER1-NEXT: rolq (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rorq (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rolq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: rorq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: rolq $7, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rorq $7, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rolq %cl, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: rorq %cl, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: rolq %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: rorq %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "rolq $0 \0A\09 rorq $0 \0A\09 rolq $2 \0A\09 rorq $2 \0A\09 rolq $3, $0 \0A\09 rorq $3, $0 \0A\09 rolq $3, $2 \0A\09 rorq $3, $2 \0A\09 rolq %CL, $0 \0A\09 rorq %CL, $0 \0A\09 rolq %CL, $2 \0A\09 rorq %CL, $2", "r,r,*m,i"(i64 %a0, i64 %a1, i64 *%a2, i8 7) - ret void -} - -define void @test_sar_shl_shr_8(i8 %a0, i8 %a1, i8 *%a2) optsize { -; GENERIC-LABEL: test_sar_shl_shr_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: sarb %dil # sched: [1:0.50] -; GENERIC-NEXT: shlb %dil # sched: [1:0.50] -; GENERIC-NEXT: shrb %dil # sched: [1:0.50] -; GENERIC-NEXT: sarb (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shlb (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shrb (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: sarb $7, %dil # sched: [1:0.50] -; GENERIC-NEXT: shlb $7, %dil # sched: [1:0.50] -; GENERIC-NEXT: shrb $7, %dil # sched: [1:0.50] -; GENERIC-NEXT: sarb $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shlb $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shrb $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: sarb %cl, %dil # sched: [3:1.50] -; GENERIC-NEXT: shlb %cl, %dil # sched: [3:1.50] -; GENERIC-NEXT: shrb %cl, %dil # sched: [3:1.50] -; GENERIC-NEXT: sarb %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: shlb %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: shrb %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sar_shl_shr_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: sarb %dil # sched: [1:1.00] -; ATOM-NEXT: shlb %dil # sched: [1:1.00] -; ATOM-NEXT: shrb %dil # sched: [1:1.00] -; ATOM-NEXT: sarb (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shlb (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shrb (%rdx) # sched: [1:1.00] -; ATOM-NEXT: sarb $7, %dil # sched: [1:1.00] -; ATOM-NEXT: shlb $7, %dil # sched: [1:1.00] -; ATOM-NEXT: shrb $7, %dil # sched: [1:1.00] -; ATOM-NEXT: sarb $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shlb $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shrb $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: sarb %cl, %dil # sched: [1:1.00] -; ATOM-NEXT: shlb %cl, %dil # sched: [1:1.00] -; ATOM-NEXT: shrb %cl, %dil # sched: [1:1.00] -; ATOM-NEXT: sarb %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shlb %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shrb %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sar_shl_shr_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: sarb %dil # sched: [1:1.00] -; SLM-NEXT: shlb %dil # sched: [1:1.00] -; SLM-NEXT: shrb %dil # sched: [1:1.00] -; SLM-NEXT: sarb (%rdx) # sched: [4:2.00] -; SLM-NEXT: shlb (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrb (%rdx) # sched: [4:2.00] -; SLM-NEXT: sarb $7, %dil # sched: [1:1.00] -; SLM-NEXT: shlb $7, %dil # sched: [1:1.00] -; SLM-NEXT: shrb $7, %dil # sched: [1:1.00] -; SLM-NEXT: sarb $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shlb $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrb $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: sarb %cl, %dil # sched: [1:1.00] -; SLM-NEXT: shlb %cl, %dil # sched: [1:1.00] -; SLM-NEXT: shrb %cl, %dil # sched: [1:1.00] -; SLM-NEXT: sarb %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shlb %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrb %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_sar_shl_shr_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: sarb %dil # sched: [1:0.50] -; SANDY-NEXT: shlb %dil # sched: [1:0.50] -; SANDY-NEXT: shrb %dil # sched: [1:0.50] -; SANDY-NEXT: sarb (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shlb (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shrb (%rdx) # sched: [7:1.00] -; SANDY-NEXT: sarb $7, %dil # sched: [1:0.50] -; SANDY-NEXT: shlb $7, %dil # sched: [1:0.50] -; SANDY-NEXT: shrb $7, %dil # sched: [1:0.50] -; SANDY-NEXT: sarb $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shlb $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shrb $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: sarb %cl, %dil # sched: [3:1.50] -; SANDY-NEXT: shlb %cl, %dil # sched: [3:1.50] -; SANDY-NEXT: shrb %cl, %dil # sched: [3:1.50] -; SANDY-NEXT: sarb %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: shlb %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: shrb %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sar_shl_shr_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: sarb %dil # sched: [1:0.50] -; HASWELL-NEXT: shlb %dil # sched: [1:0.50] -; HASWELL-NEXT: shrb %dil # sched: [1:0.50] -; HASWELL-NEXT: sarb (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shlb (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shrb (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: sarb $7, %dil # sched: [1:0.50] -; HASWELL-NEXT: shlb $7, %dil # sched: [1:0.50] -; HASWELL-NEXT: shrb $7, %dil # sched: [1:0.50] -; HASWELL-NEXT: sarb $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shlb $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shrb $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: sarb %cl, %dil # sched: [3:1.00] -; HASWELL-NEXT: shlb %cl, %dil # sched: [3:1.00] -; HASWELL-NEXT: shrb %cl, %dil # sched: [3:1.00] -; HASWELL-NEXT: sarb %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: shlb %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: shrb %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sar_shl_shr_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: sarb %dil # sched: [1:0.50] -; BROADWELL-NEXT: shlb %dil # sched: [1:0.50] -; BROADWELL-NEXT: shrb %dil # sched: [1:0.50] -; BROADWELL-NEXT: sarb (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shlb (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shrb (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: sarb $7, %dil # sched: [1:0.50] -; BROADWELL-NEXT: shlb $7, %dil # sched: [1:0.50] -; BROADWELL-NEXT: shrb $7, %dil # sched: [1:0.50] -; BROADWELL-NEXT: sarb $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shlb $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shrb $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: sarb %cl, %dil # sched: [3:1.00] -; BROADWELL-NEXT: shlb %cl, %dil # sched: [3:1.00] -; BROADWELL-NEXT: shrb %cl, %dil # sched: [3:1.00] -; BROADWELL-NEXT: sarb %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: shlb %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: shrb %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sar_shl_shr_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: sarb %dil # sched: [1:0.50] -; SKYLAKE-NEXT: shlb %dil # sched: [1:0.50] -; SKYLAKE-NEXT: shrb %dil # sched: [1:0.50] -; SKYLAKE-NEXT: sarb (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shlb (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shrb (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: sarb $7, %dil # sched: [1:0.50] -; SKYLAKE-NEXT: shlb $7, %dil # sched: [1:0.50] -; SKYLAKE-NEXT: shrb $7, %dil # sched: [1:0.50] -; SKYLAKE-NEXT: sarb $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shlb $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shrb $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: sarb %cl, %dil # sched: [3:1.50] -; SKYLAKE-NEXT: shlb %cl, %dil # sched: [3:1.50] -; SKYLAKE-NEXT: shrb %cl, %dil # sched: [3:1.50] -; SKYLAKE-NEXT: sarb %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: shlb %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: shrb %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sar_shl_shr_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: sarb %dil # sched: [1:0.50] -; SKX-NEXT: shlb %dil # sched: [1:0.50] -; SKX-NEXT: shrb %dil # sched: [1:0.50] -; SKX-NEXT: sarb (%rdx) # sched: [6:1.00] -; SKX-NEXT: shlb (%rdx) # sched: [6:1.00] -; SKX-NEXT: shrb (%rdx) # sched: [6:1.00] -; SKX-NEXT: sarb $7, %dil # sched: [1:0.50] -; SKX-NEXT: shlb $7, %dil # sched: [1:0.50] -; SKX-NEXT: shrb $7, %dil # sched: [1:0.50] -; SKX-NEXT: sarb $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: shlb $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: shrb $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: sarb %cl, %dil # sched: [3:1.50] -; SKX-NEXT: shlb %cl, %dil # sched: [3:1.50] -; SKX-NEXT: shrb %cl, %dil # sched: [3:1.50] -; SKX-NEXT: sarb %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: shlb %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: shrb %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sar_shl_shr_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: sarb %dil # sched: [1:0.50] -; BDVER2-NEXT: shlb %dil # sched: [1:0.50] -; BDVER2-NEXT: shrb %dil # sched: [1:0.50] -; BDVER2-NEXT: sarb (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shlb (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shrb (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: sarb $7, %dil # sched: [1:0.50] -; BDVER2-NEXT: shlb $7, %dil # sched: [1:0.50] -; BDVER2-NEXT: shrb $7, %dil # sched: [1:0.50] -; BDVER2-NEXT: sarb $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shlb $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shrb $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: sarb %cl, %dil # sched: [1:0.50] -; BDVER2-NEXT: shlb %cl, %dil # sched: [1:0.50] -; BDVER2-NEXT: shrb %cl, %dil # sched: [1:0.50] -; BDVER2-NEXT: sarb %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shlb %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shrb %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sar_shl_shr_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: sarb %dil # sched: [1:0.50] -; BTVER2-NEXT: shlb %dil # sched: [1:0.50] -; BTVER2-NEXT: shrb %dil # sched: [1:0.50] -; BTVER2-NEXT: sarb (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shlb (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shrb (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: sarb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: shlb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: shrb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: sarb $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shlb $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shrb $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: sarb %cl, %dil # sched: [1:0.50] -; BTVER2-NEXT: shlb %cl, %dil # sched: [1:0.50] -; BTVER2-NEXT: shrb %cl, %dil # sched: [1:0.50] -; BTVER2-NEXT: sarb %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shlb %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shrb %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sar_shl_shr_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: sarb %dil # sched: [1:0.25] -; ZNVER1-NEXT: shlb %dil # sched: [1:0.25] -; ZNVER1-NEXT: shrb %dil # sched: [1:0.25] -; ZNVER1-NEXT: sarb (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shlb (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shrb (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: sarb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: shlb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: shrb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: sarb $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shlb $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shrb $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: sarb %cl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: shlb %cl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: shrb %cl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: sarb %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: shlb %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: shrb %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "sarb $0 \0A\09 shlb $0 \0A\09 shrb $0 \0A\09 sarb $2 \0A\09 shlb $2 \0A\09 shrb $2 \0A\09 sarb $3, $0 \0A\09 shlb $3, $0 \0A\09 shrb $3, $0 \0A\09 sarb $3, $2 \0A\09 shlb $3, $2 \0A\09 shrb $3, $2 \0A\09 sarb %CL, $0 \0A\09 shlb %CL, $0 \0A\09 shrb %CL, $0 \0A\09 sarb %CL, $2 \0A\09 shlb %CL, $2 \0A\09 shrb %CL, $2", "r,r,*m,i"(i8 %a0, i8 %a1, i8 *%a2, i8 7) - ret void -} -define void @test_sar_shl_shr_16(i16 %a0, i16 %a1, i16 *%a2) optsize { -; GENERIC-LABEL: test_sar_shl_shr_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: sarw %di # sched: [1:0.50] -; GENERIC-NEXT: shlw %di # sched: [1:0.50] -; GENERIC-NEXT: shrw %di # sched: [1:0.50] -; GENERIC-NEXT: sarw (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shlw (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shrw (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: sarw $7, %di # sched: [1:0.50] -; GENERIC-NEXT: shlw $7, %di # sched: [1:0.50] -; GENERIC-NEXT: shrw $7, %di # sched: [1:0.50] -; GENERIC-NEXT: sarw $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shlw $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shrw $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: sarw %cl, %di # sched: [3:1.50] -; GENERIC-NEXT: shlw %cl, %di # sched: [3:1.50] -; GENERIC-NEXT: shrw %cl, %di # sched: [3:1.50] -; GENERIC-NEXT: sarw %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: shlw %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: shrw %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sar_shl_shr_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: sarw %di # sched: [1:1.00] -; ATOM-NEXT: shlw %di # sched: [1:1.00] -; ATOM-NEXT: shrw %di # sched: [1:1.00] -; ATOM-NEXT: sarw (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shlw (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shrw (%rdx) # sched: [1:1.00] -; ATOM-NEXT: sarw $7, %di # sched: [1:1.00] -; ATOM-NEXT: shlw $7, %di # sched: [1:1.00] -; ATOM-NEXT: shrw $7, %di # sched: [1:1.00] -; ATOM-NEXT: sarw $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shlw $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shrw $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: sarw %cl, %di # sched: [1:1.00] -; ATOM-NEXT: shlw %cl, %di # sched: [1:1.00] -; ATOM-NEXT: shrw %cl, %di # sched: [1:1.00] -; ATOM-NEXT: sarw %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shlw %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shrw %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sar_shl_shr_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: sarw %di # sched: [1:1.00] -; SLM-NEXT: shlw %di # sched: [1:1.00] -; SLM-NEXT: shrw %di # sched: [1:1.00] -; SLM-NEXT: sarw (%rdx) # sched: [4:2.00] -; SLM-NEXT: shlw (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrw (%rdx) # sched: [4:2.00] -; SLM-NEXT: sarw $7, %di # sched: [1:1.00] -; SLM-NEXT: shlw $7, %di # sched: [1:1.00] -; SLM-NEXT: shrw $7, %di # sched: [1:1.00] -; SLM-NEXT: sarw $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shlw $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrw $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: sarw %cl, %di # sched: [1:1.00] -; SLM-NEXT: shlw %cl, %di # sched: [1:1.00] -; SLM-NEXT: shrw %cl, %di # sched: [1:1.00] -; SLM-NEXT: sarw %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shlw %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrw %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_sar_shl_shr_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: sarw %di # sched: [1:0.50] -; SANDY-NEXT: shlw %di # sched: [1:0.50] -; SANDY-NEXT: shrw %di # sched: [1:0.50] -; SANDY-NEXT: sarw (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shlw (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shrw (%rdx) # sched: [7:1.00] -; SANDY-NEXT: sarw $7, %di # sched: [1:0.50] -; SANDY-NEXT: shlw $7, %di # sched: [1:0.50] -; SANDY-NEXT: shrw $7, %di # sched: [1:0.50] -; SANDY-NEXT: sarw $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shlw $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shrw $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: sarw %cl, %di # sched: [3:1.50] -; SANDY-NEXT: shlw %cl, %di # sched: [3:1.50] -; SANDY-NEXT: shrw %cl, %di # sched: [3:1.50] -; SANDY-NEXT: sarw %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: shlw %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: shrw %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sar_shl_shr_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: sarw %di # sched: [1:0.50] -; HASWELL-NEXT: shlw %di # sched: [1:0.50] -; HASWELL-NEXT: shrw %di # sched: [1:0.50] -; HASWELL-NEXT: sarw (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shlw (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shrw (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: sarw $7, %di # sched: [1:0.50] -; HASWELL-NEXT: shlw $7, %di # sched: [1:0.50] -; HASWELL-NEXT: shrw $7, %di # sched: [1:0.50] -; HASWELL-NEXT: sarw $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shlw $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shrw $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: sarw %cl, %di # sched: [3:1.00] -; HASWELL-NEXT: shlw %cl, %di # sched: [3:1.00] -; HASWELL-NEXT: shrw %cl, %di # sched: [3:1.00] -; HASWELL-NEXT: sarw %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: shlw %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: shrw %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sar_shl_shr_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: sarw %di # sched: [1:0.50] -; BROADWELL-NEXT: shlw %di # sched: [1:0.50] -; BROADWELL-NEXT: shrw %di # sched: [1:0.50] -; BROADWELL-NEXT: sarw (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shlw (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shrw (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: sarw $7, %di # sched: [1:0.50] -; BROADWELL-NEXT: shlw $7, %di # sched: [1:0.50] -; BROADWELL-NEXT: shrw $7, %di # sched: [1:0.50] -; BROADWELL-NEXT: sarw $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shlw $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shrw $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: sarw %cl, %di # sched: [3:1.00] -; BROADWELL-NEXT: shlw %cl, %di # sched: [3:1.00] -; BROADWELL-NEXT: shrw %cl, %di # sched: [3:1.00] -; BROADWELL-NEXT: sarw %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: shlw %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: shrw %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sar_shl_shr_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: sarw %di # sched: [1:0.50] -; SKYLAKE-NEXT: shlw %di # sched: [1:0.50] -; SKYLAKE-NEXT: shrw %di # sched: [1:0.50] -; SKYLAKE-NEXT: sarw (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shlw (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shrw (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: sarw $7, %di # sched: [1:0.50] -; SKYLAKE-NEXT: shlw $7, %di # sched: [1:0.50] -; SKYLAKE-NEXT: shrw $7, %di # sched: [1:0.50] -; SKYLAKE-NEXT: sarw $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shlw $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shrw $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: sarw %cl, %di # sched: [3:1.50] -; SKYLAKE-NEXT: shlw %cl, %di # sched: [3:1.50] -; SKYLAKE-NEXT: shrw %cl, %di # sched: [3:1.50] -; SKYLAKE-NEXT: sarw %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: shlw %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: shrw %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sar_shl_shr_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: sarw %di # sched: [1:0.50] -; SKX-NEXT: shlw %di # sched: [1:0.50] -; SKX-NEXT: shrw %di # sched: [1:0.50] -; SKX-NEXT: sarw (%rdx) # sched: [6:1.00] -; SKX-NEXT: shlw (%rdx) # sched: [6:1.00] -; SKX-NEXT: shrw (%rdx) # sched: [6:1.00] -; SKX-NEXT: sarw $7, %di # sched: [1:0.50] -; SKX-NEXT: shlw $7, %di # sched: [1:0.50] -; SKX-NEXT: shrw $7, %di # sched: [1:0.50] -; SKX-NEXT: sarw $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: shlw $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: shrw $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: sarw %cl, %di # sched: [3:1.50] -; SKX-NEXT: shlw %cl, %di # sched: [3:1.50] -; SKX-NEXT: shrw %cl, %di # sched: [3:1.50] -; SKX-NEXT: sarw %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: shlw %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: shrw %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sar_shl_shr_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: sarw %di # sched: [1:0.50] -; BDVER2-NEXT: shlw %di # sched: [1:0.50] -; BDVER2-NEXT: shrw %di # sched: [1:0.50] -; BDVER2-NEXT: sarw (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shlw (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shrw (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: sarw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: shlw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: shrw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: sarw $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shlw $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shrw $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: sarw %cl, %di # sched: [1:0.50] -; BDVER2-NEXT: shlw %cl, %di # sched: [1:0.50] -; BDVER2-NEXT: shrw %cl, %di # sched: [1:0.50] -; BDVER2-NEXT: sarw %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shlw %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shrw %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sar_shl_shr_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: sarw %di # sched: [1:0.50] -; BTVER2-NEXT: shlw %di # sched: [1:0.50] -; BTVER2-NEXT: shrw %di # sched: [1:0.50] -; BTVER2-NEXT: sarw (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shlw (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shrw (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: sarw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: shlw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: shrw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: sarw $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shlw $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shrw $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: sarw %cl, %di # sched: [1:0.50] -; BTVER2-NEXT: shlw %cl, %di # sched: [1:0.50] -; BTVER2-NEXT: shrw %cl, %di # sched: [1:0.50] -; BTVER2-NEXT: sarw %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shlw %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shrw %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sar_shl_shr_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: sarw %di # sched: [1:0.25] -; ZNVER1-NEXT: shlw %di # sched: [1:0.25] -; ZNVER1-NEXT: shrw %di # sched: [1:0.25] -; ZNVER1-NEXT: sarw (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shlw (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shrw (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: sarw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: shlw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: shrw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: sarw $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shlw $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shrw $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: sarw %cl, %di # sched: [1:0.25] -; ZNVER1-NEXT: shlw %cl, %di # sched: [1:0.25] -; ZNVER1-NEXT: shrw %cl, %di # sched: [1:0.25] -; ZNVER1-NEXT: sarw %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: shlw %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: shrw %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "sarw $0 \0A\09 shlw $0 \0A\09 shrw $0 \0A\09 sarw $2 \0A\09 shlw $2 \0A\09 shrw $2 \0A\09 sarw $3, $0 \0A\09 shlw $3, $0 \0A\09 shrw $3, $0 \0A\09 sarw $3, $2 \0A\09 shlw $3, $2 \0A\09 shrw $3, $2 \0A\09 sarw %CL, $0 \0A\09 shlw %CL, $0 \0A\09 shrw %CL, $0 \0A\09 sarw %CL, $2 \0A\09 shlw %CL, $2 \0A\09 shrw %CL, $2", "r,r,*m,i"(i16 %a0, i16 %a1, i16 *%a2, i8 7) - ret void -} -define void @test_sar_shl_shr_32(i32 %a0, i32 %a1, i32 *%a2) optsize { -; GENERIC-LABEL: test_sar_shl_shr_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: sarl %edi # sched: [1:0.50] -; GENERIC-NEXT: shll %edi # sched: [1:0.50] -; GENERIC-NEXT: shrl %edi # sched: [1:0.50] -; GENERIC-NEXT: sarl (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shll (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shrl (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: sarl $7, %edi # sched: [1:0.50] -; GENERIC-NEXT: shll $7, %edi # sched: [1:0.50] -; GENERIC-NEXT: shrl $7, %edi # sched: [1:0.50] -; GENERIC-NEXT: sarl $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shll $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shrl $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: sarl %cl, %edi # sched: [3:1.50] -; GENERIC-NEXT: shll %cl, %edi # sched: [3:1.50] -; GENERIC-NEXT: shrl %cl, %edi # sched: [3:1.50] -; GENERIC-NEXT: sarl %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: shll %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: shrl %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sar_shl_shr_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: sarl %edi # sched: [1:1.00] -; ATOM-NEXT: shll %edi # sched: [1:1.00] -; ATOM-NEXT: shrl %edi # sched: [1:1.00] -; ATOM-NEXT: sarl (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shll (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shrl (%rdx) # sched: [1:1.00] -; ATOM-NEXT: sarl $7, %edi # sched: [1:1.00] -; ATOM-NEXT: shll $7, %edi # sched: [1:1.00] -; ATOM-NEXT: shrl $7, %edi # sched: [1:1.00] -; ATOM-NEXT: sarl $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shll $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shrl $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: sarl %cl, %edi # sched: [1:1.00] -; ATOM-NEXT: shll %cl, %edi # sched: [1:1.00] -; ATOM-NEXT: shrl %cl, %edi # sched: [1:1.00] -; ATOM-NEXT: sarl %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shll %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shrl %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sar_shl_shr_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: sarl %edi # sched: [1:1.00] -; SLM-NEXT: shll %edi # sched: [1:1.00] -; SLM-NEXT: shrl %edi # sched: [1:1.00] -; SLM-NEXT: sarl (%rdx) # sched: [4:2.00] -; SLM-NEXT: shll (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrl (%rdx) # sched: [4:2.00] -; SLM-NEXT: sarl $7, %edi # sched: [1:1.00] -; SLM-NEXT: shll $7, %edi # sched: [1:1.00] -; SLM-NEXT: shrl $7, %edi # sched: [1:1.00] -; SLM-NEXT: sarl $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shll $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrl $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: sarl %cl, %edi # sched: [1:1.00] -; SLM-NEXT: shll %cl, %edi # sched: [1:1.00] -; SLM-NEXT: shrl %cl, %edi # sched: [1:1.00] -; SLM-NEXT: sarl %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shll %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrl %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_sar_shl_shr_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: sarl %edi # sched: [1:0.50] -; SANDY-NEXT: shll %edi # sched: [1:0.50] -; SANDY-NEXT: shrl %edi # sched: [1:0.50] -; SANDY-NEXT: sarl (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shll (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shrl (%rdx) # sched: [7:1.00] -; SANDY-NEXT: sarl $7, %edi # sched: [1:0.50] -; SANDY-NEXT: shll $7, %edi # sched: [1:0.50] -; SANDY-NEXT: shrl $7, %edi # sched: [1:0.50] -; SANDY-NEXT: sarl $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shll $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shrl $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: sarl %cl, %edi # sched: [3:1.50] -; SANDY-NEXT: shll %cl, %edi # sched: [3:1.50] -; SANDY-NEXT: shrl %cl, %edi # sched: [3:1.50] -; SANDY-NEXT: sarl %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: shll %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: shrl %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sar_shl_shr_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: sarl %edi # sched: [1:0.50] -; HASWELL-NEXT: shll %edi # sched: [1:0.50] -; HASWELL-NEXT: shrl %edi # sched: [1:0.50] -; HASWELL-NEXT: sarl (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shll (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shrl (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: sarl $7, %edi # sched: [1:0.50] -; HASWELL-NEXT: shll $7, %edi # sched: [1:0.50] -; HASWELL-NEXT: shrl $7, %edi # sched: [1:0.50] -; HASWELL-NEXT: sarl $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shll $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shrl $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: sarl %cl, %edi # sched: [3:1.00] -; HASWELL-NEXT: shll %cl, %edi # sched: [3:1.00] -; HASWELL-NEXT: shrl %cl, %edi # sched: [3:1.00] -; HASWELL-NEXT: sarl %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: shll %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: shrl %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sar_shl_shr_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: sarl %edi # sched: [1:0.50] -; BROADWELL-NEXT: shll %edi # sched: [1:0.50] -; BROADWELL-NEXT: shrl %edi # sched: [1:0.50] -; BROADWELL-NEXT: sarl (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shll (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shrl (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: sarl $7, %edi # sched: [1:0.50] -; BROADWELL-NEXT: shll $7, %edi # sched: [1:0.50] -; BROADWELL-NEXT: shrl $7, %edi # sched: [1:0.50] -; BROADWELL-NEXT: sarl $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shll $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shrl $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: sarl %cl, %edi # sched: [3:1.00] -; BROADWELL-NEXT: shll %cl, %edi # sched: [3:1.00] -; BROADWELL-NEXT: shrl %cl, %edi # sched: [3:1.00] -; BROADWELL-NEXT: sarl %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: shll %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: shrl %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sar_shl_shr_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: sarl %edi # sched: [1:0.50] -; SKYLAKE-NEXT: shll %edi # sched: [1:0.50] -; SKYLAKE-NEXT: shrl %edi # sched: [1:0.50] -; SKYLAKE-NEXT: sarl (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shll (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shrl (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: sarl $7, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: shll $7, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: shrl $7, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: sarl $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shll $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shrl $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: sarl %cl, %edi # sched: [3:1.50] -; SKYLAKE-NEXT: shll %cl, %edi # sched: [3:1.50] -; SKYLAKE-NEXT: shrl %cl, %edi # sched: [3:1.50] -; SKYLAKE-NEXT: sarl %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: shll %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: shrl %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sar_shl_shr_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: sarl %edi # sched: [1:0.50] -; SKX-NEXT: shll %edi # sched: [1:0.50] -; SKX-NEXT: shrl %edi # sched: [1:0.50] -; SKX-NEXT: sarl (%rdx) # sched: [6:1.00] -; SKX-NEXT: shll (%rdx) # sched: [6:1.00] -; SKX-NEXT: shrl (%rdx) # sched: [6:1.00] -; SKX-NEXT: sarl $7, %edi # sched: [1:0.50] -; SKX-NEXT: shll $7, %edi # sched: [1:0.50] -; SKX-NEXT: shrl $7, %edi # sched: [1:0.50] -; SKX-NEXT: sarl $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: shll $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: shrl $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: sarl %cl, %edi # sched: [3:1.50] -; SKX-NEXT: shll %cl, %edi # sched: [3:1.50] -; SKX-NEXT: shrl %cl, %edi # sched: [3:1.50] -; SKX-NEXT: sarl %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: shll %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: shrl %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sar_shl_shr_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: sarl %edi # sched: [1:0.50] -; BDVER2-NEXT: shll %edi # sched: [1:0.50] -; BDVER2-NEXT: shrl %edi # sched: [1:0.50] -; BDVER2-NEXT: sarl (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shll (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shrl (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: sarl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: shll $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: shrl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: sarl $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shll $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shrl $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: sarl %cl, %edi # sched: [1:0.50] -; BDVER2-NEXT: shll %cl, %edi # sched: [1:0.50] -; BDVER2-NEXT: shrl %cl, %edi # sched: [1:0.50] -; BDVER2-NEXT: sarl %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shll %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shrl %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sar_shl_shr_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: sarl %edi # sched: [1:0.50] -; BTVER2-NEXT: shll %edi # sched: [1:0.50] -; BTVER2-NEXT: shrl %edi # sched: [1:0.50] -; BTVER2-NEXT: sarl (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shll (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shrl (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: sarl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: shll $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: shrl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: sarl $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shll $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shrl $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: sarl %cl, %edi # sched: [1:0.50] -; BTVER2-NEXT: shll %cl, %edi # sched: [1:0.50] -; BTVER2-NEXT: shrl %cl, %edi # sched: [1:0.50] -; BTVER2-NEXT: sarl %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shll %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shrl %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sar_shl_shr_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: sarl %edi # sched: [1:0.25] -; ZNVER1-NEXT: shll %edi # sched: [1:0.25] -; ZNVER1-NEXT: shrl %edi # sched: [1:0.25] -; ZNVER1-NEXT: sarl (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shll (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shrl (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: sarl $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: shll $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: shrl $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: sarl $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shll $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shrl $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: sarl %cl, %edi # sched: [1:0.25] -; ZNVER1-NEXT: shll %cl, %edi # sched: [1:0.25] -; ZNVER1-NEXT: shrl %cl, %edi # sched: [1:0.25] -; ZNVER1-NEXT: sarl %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: shll %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: shrl %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "sarl $0 \0A\09 shll $0 \0A\09 shrl $0 \0A\09 sarl $2 \0A\09 shll $2 \0A\09 shrl $2 \0A\09 sarl $3, $0 \0A\09 shll $3, $0 \0A\09 shrl $3, $0 \0A\09 sarl $3, $2 \0A\09 shll $3, $2 \0A\09 shrl $3, $2 \0A\09 sarl %CL, $0 \0A\09 shll %CL, $0 \0A\09 shrl %CL, $0 \0A\09 sarl %CL, $2 \0A\09 shll %CL, $2 \0A\09 shrl %CL, $2", "r,r,*m,i"(i32 %a0, i32 %a1, i32 *%a2, i8 7) - ret void -} -define void @test_sar_shl_shr_64(i64 %a0, i64 %a1, i64 *%a2) optsize { -; GENERIC-LABEL: test_sar_shl_shr_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: sarq %rdi # sched: [1:0.50] -; GENERIC-NEXT: shlq %rdi # sched: [1:0.50] -; GENERIC-NEXT: shrq %rdi # sched: [1:0.50] -; GENERIC-NEXT: sarq (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shlq (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shrq (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: sarq $7, %rdi # sched: [1:0.50] -; GENERIC-NEXT: shlq $7, %rdi # sched: [1:0.50] -; GENERIC-NEXT: shrq $7, %rdi # sched: [1:0.50] -; GENERIC-NEXT: sarq $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shlq $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: shrq $7, (%rdx) # sched: [7:1.00] -; GENERIC-NEXT: sarq %cl, %rdi # sched: [3:1.50] -; GENERIC-NEXT: shlq %cl, %rdi # sched: [3:1.50] -; GENERIC-NEXT: shrq %cl, %rdi # sched: [3:1.50] -; GENERIC-NEXT: sarq %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: shlq %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: shrq %cl, (%rdx) # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sar_shl_shr_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: sarq %rdi # sched: [1:1.00] -; ATOM-NEXT: shlq %rdi # sched: [1:1.00] -; ATOM-NEXT: shrq %rdi # sched: [1:1.00] -; ATOM-NEXT: sarq (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shlq (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shrq (%rdx) # sched: [1:1.00] -; ATOM-NEXT: sarq $7, %rdi # sched: [1:1.00] -; ATOM-NEXT: shlq $7, %rdi # sched: [1:1.00] -; ATOM-NEXT: shrq $7, %rdi # sched: [1:1.00] -; ATOM-NEXT: sarq $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shlq $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shrq $7, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: sarq %cl, %rdi # sched: [1:1.00] -; ATOM-NEXT: shlq %cl, %rdi # sched: [1:1.00] -; ATOM-NEXT: shrq %cl, %rdi # sched: [1:1.00] -; ATOM-NEXT: sarq %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shlq %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: shrq %cl, (%rdx) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sar_shl_shr_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: sarq %rdi # sched: [1:1.00] -; SLM-NEXT: shlq %rdi # sched: [1:1.00] -; SLM-NEXT: shrq %rdi # sched: [1:1.00] -; SLM-NEXT: sarq (%rdx) # sched: [4:2.00] -; SLM-NEXT: shlq (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrq (%rdx) # sched: [4:2.00] -; SLM-NEXT: sarq $7, %rdi # sched: [1:1.00] -; SLM-NEXT: shlq $7, %rdi # sched: [1:1.00] -; SLM-NEXT: shrq $7, %rdi # sched: [1:1.00] -; SLM-NEXT: sarq $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shlq $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrq $7, (%rdx) # sched: [4:2.00] -; SLM-NEXT: sarq %cl, %rdi # sched: [1:1.00] -; SLM-NEXT: shlq %cl, %rdi # sched: [1:1.00] -; SLM-NEXT: shrq %cl, %rdi # sched: [1:1.00] -; SLM-NEXT: sarq %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shlq %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrq %cl, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_sar_shl_shr_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: sarq %rdi # sched: [1:0.50] -; SANDY-NEXT: shlq %rdi # sched: [1:0.50] -; SANDY-NEXT: shrq %rdi # sched: [1:0.50] -; SANDY-NEXT: sarq (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shlq (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shrq (%rdx) # sched: [7:1.00] -; SANDY-NEXT: sarq $7, %rdi # sched: [1:0.50] -; SANDY-NEXT: shlq $7, %rdi # sched: [1:0.50] -; SANDY-NEXT: shrq $7, %rdi # sched: [1:0.50] -; SANDY-NEXT: sarq $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shlq $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: shrq $7, (%rdx) # sched: [7:1.00] -; SANDY-NEXT: sarq %cl, %rdi # sched: [3:1.50] -; SANDY-NEXT: shlq %cl, %rdi # sched: [3:1.50] -; SANDY-NEXT: shrq %cl, %rdi # sched: [3:1.50] -; SANDY-NEXT: sarq %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: shlq %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: shrq %cl, (%rdx) # sched: [9:1.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sar_shl_shr_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: sarq %rdi # sched: [1:0.50] -; HASWELL-NEXT: shlq %rdi # sched: [1:0.50] -; HASWELL-NEXT: shrq %rdi # sched: [1:0.50] -; HASWELL-NEXT: sarq (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shlq (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shrq (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: sarq $7, %rdi # sched: [1:0.50] -; HASWELL-NEXT: shlq $7, %rdi # sched: [1:0.50] -; HASWELL-NEXT: shrq $7, %rdi # sched: [1:0.50] -; HASWELL-NEXT: sarq $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shlq $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: shrq $7, (%rdx) # sched: [7:1.00] -; HASWELL-NEXT: sarq %cl, %rdi # sched: [3:1.00] -; HASWELL-NEXT: shlq %cl, %rdi # sched: [3:1.00] -; HASWELL-NEXT: shrq %cl, %rdi # sched: [3:1.00] -; HASWELL-NEXT: sarq %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: shlq %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: shrq %cl, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sar_shl_shr_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: sarq %rdi # sched: [1:0.50] -; BROADWELL-NEXT: shlq %rdi # sched: [1:0.50] -; BROADWELL-NEXT: shrq %rdi # sched: [1:0.50] -; BROADWELL-NEXT: sarq (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shlq (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shrq (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: sarq $7, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: shlq $7, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: shrq $7, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: sarq $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shlq $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: shrq $7, (%rdx) # sched: [6:1.00] -; BROADWELL-NEXT: sarq %cl, %rdi # sched: [3:1.00] -; BROADWELL-NEXT: shlq %cl, %rdi # sched: [3:1.00] -; BROADWELL-NEXT: shrq %cl, %rdi # sched: [3:1.00] -; BROADWELL-NEXT: sarq %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: shlq %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: shrq %cl, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sar_shl_shr_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: sarq %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: shlq %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: shrq %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: sarq (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shlq (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shrq (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: sarq $7, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: shlq $7, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: shrq $7, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: sarq $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shlq $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: shrq $7, (%rdx) # sched: [6:1.00] -; SKYLAKE-NEXT: sarq %cl, %rdi # sched: [3:1.50] -; SKYLAKE-NEXT: shlq %cl, %rdi # sched: [3:1.50] -; SKYLAKE-NEXT: shrq %cl, %rdi # sched: [3:1.50] -; SKYLAKE-NEXT: sarq %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: shlq %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: shrq %cl, (%rdx) # sched: [8:1.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sar_shl_shr_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: sarq %rdi # sched: [1:0.50] -; SKX-NEXT: shlq %rdi # sched: [1:0.50] -; SKX-NEXT: shrq %rdi # sched: [1:0.50] -; SKX-NEXT: sarq (%rdx) # sched: [6:1.00] -; SKX-NEXT: shlq (%rdx) # sched: [6:1.00] -; SKX-NEXT: shrq (%rdx) # sched: [6:1.00] -; SKX-NEXT: sarq $7, %rdi # sched: [1:0.50] -; SKX-NEXT: shlq $7, %rdi # sched: [1:0.50] -; SKX-NEXT: shrq $7, %rdi # sched: [1:0.50] -; SKX-NEXT: sarq $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: shlq $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: shrq $7, (%rdx) # sched: [6:1.00] -; SKX-NEXT: sarq %cl, %rdi # sched: [3:1.50] -; SKX-NEXT: shlq %cl, %rdi # sched: [3:1.50] -; SKX-NEXT: shrq %cl, %rdi # sched: [3:1.50] -; SKX-NEXT: sarq %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: shlq %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: shrq %cl, (%rdx) # sched: [8:1.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sar_shl_shr_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: sarq %rdi # sched: [1:0.50] -; BDVER2-NEXT: shlq %rdi # sched: [1:0.50] -; BDVER2-NEXT: shrq %rdi # sched: [1:0.50] -; BDVER2-NEXT: sarq (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shlq (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shrq (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: sarq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: shlq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: shrq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: sarq $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shlq $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shrq $7, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: sarq %cl, %rdi # sched: [1:0.50] -; BDVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50] -; BDVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] -; BDVER2-NEXT: sarq %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shlq %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: shrq %cl, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sar_shl_shr_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: sarq %rdi # sched: [1:0.50] -; BTVER2-NEXT: shlq %rdi # sched: [1:0.50] -; BTVER2-NEXT: shrq %rdi # sched: [1:0.50] -; BTVER2-NEXT: sarq (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shlq (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shrq (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: sarq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: shlq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: shrq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: sarq $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shlq $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shrq $7, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: sarq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: sarq %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shlq %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: shrq %cl, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sar_shl_shr_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: sarq %rdi # sched: [1:0.25] -; ZNVER1-NEXT: shlq %rdi # sched: [1:0.25] -; ZNVER1-NEXT: shrq %rdi # sched: [1:0.25] -; ZNVER1-NEXT: sarq (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shlq (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shrq (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: sarq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: shlq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: shrq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: sarq $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shlq $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shrq $7, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: sarq %cl, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: shlq %cl, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: shrq %cl, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: sarq %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: shlq %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: shrq %cl, (%rdx) # sched: [5:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "sarq $0 \0A\09 shlq $0 \0A\09 shrq $0 \0A\09 sarq $2 \0A\09 shlq $2 \0A\09 shrq $2 \0A\09 sarq $3, $0 \0A\09 shlq $3, $0 \0A\09 shrq $3, $0 \0A\09 sarq $3, $2 \0A\09 shlq $3, $2 \0A\09 shrq $3, $2 \0A\09 sarq %CL, $0 \0A\09 shlq %CL, $0 \0A\09 shrq %CL, $0 \0A\09 sarq %CL, $2 \0A\09 shlq %CL, $2 \0A\09 shrq %CL, $2", "r,r,*m,i"(i64 %a0, i64 %a1, i64 *%a2, i8 7) - ret void -} - -define void @test_sbb_8(i8 %a0, i8* %a1, i8 %a2) optsize { -; GENERIC-LABEL: test_sbb_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: sbbb $7, %al # sched: [2:0.67] -; GENERIC-NEXT: sbbb $7, %dil # sched: [2:0.67] -; GENERIC-NEXT: sbbb $7, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: sbbb %dl, %dil # sched: [2:0.67] -; GENERIC-NEXT: sbbb %dil, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: sbbb (%rsi), %dil # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sbb_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: sbbb $7, %al # sched: [1:0.50] -; ATOM-NEXT: sbbb $7, %dil # sched: [1:0.50] -; ATOM-NEXT: sbbb $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: sbbb %dl, %dil # sched: [1:0.50] -; ATOM-NEXT: sbbb %dil, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: sbbb (%rsi), %dil # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sbb_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: sbbb $7, %al # sched: [1:0.50] -; SLM-NEXT: sbbb $7, %dil # sched: [1:0.50] -; SLM-NEXT: sbbb $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: sbbb %dl, %dil # sched: [1:0.50] -; SLM-NEXT: sbbb %dil, (%rsi) # sched: [5:2.00] -; SLM-NEXT: sbbb (%rsi), %dil # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_sbb_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: sbbb $7, %al # sched: [2:0.67] -; SANDY-NEXT: sbbb $7, %dil # sched: [2:0.67] -; SANDY-NEXT: sbbb $7, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: sbbb %dl, %dil # sched: [2:0.67] -; SANDY-NEXT: sbbb %dil, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: sbbb (%rsi), %dil # sched: [7:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sbb_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: sbbb $7, %al # sched: [2:0.50] -; HASWELL-NEXT: sbbb $7, %dil # sched: [2:0.50] -; HASWELL-NEXT: sbbb $7, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: sbbb %dl, %dil # sched: [2:0.50] -; HASWELL-NEXT: sbbb %dil, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: sbbb (%rsi), %dil # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sbb_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: sbbb $7, %al # sched: [2:0.50] -; BROADWELL-NEXT: sbbb $7, %dil # sched: [2:0.50] -; BROADWELL-NEXT: sbbb $7, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: sbbb %dl, %dil # sched: [1:0.50] -; BROADWELL-NEXT: sbbb %dil, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: sbbb (%rsi), %dil # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sbb_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: sbbb $7, %al # sched: [2:0.50] -; SKYLAKE-NEXT: sbbb $7, %dil # sched: [2:0.50] -; SKYLAKE-NEXT: sbbb $7, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: sbbb %dl, %dil # sched: [1:0.50] -; SKYLAKE-NEXT: sbbb %dil, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: sbbb (%rsi), %dil # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sbb_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: sbbb $7, %al # sched: [2:0.50] -; SKX-NEXT: sbbb $7, %dil # sched: [2:0.50] -; SKX-NEXT: sbbb $7, (%rsi) # sched: [8:1.00] -; SKX-NEXT: sbbb %dl, %dil # sched: [1:0.50] -; SKX-NEXT: sbbb %dil, (%rsi) # sched: [8:1.00] -; SKX-NEXT: sbbb (%rsi), %dil # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sbb_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: sbbb $7, %al # sched: [1:1.00] -; BDVER2-NEXT: sbbb $7, %dil # sched: [1:1.00] -; BDVER2-NEXT: sbbb $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: sbbb %dl, %dil # sched: [1:1.00] -; BDVER2-NEXT: sbbb %dil, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: sbbb (%rsi), %dil # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sbb_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: sbbb $7, %al # sched: [1:1.00] -; BTVER2-NEXT: sbbb $7, %dil # sched: [1:1.00] -; BTVER2-NEXT: sbbb $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: sbbb %dl, %dil # sched: [1:1.00] -; BTVER2-NEXT: sbbb %dil, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: sbbb (%rsi), %dil # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sbb_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: sbbb $7, %al # sched: [1:0.25] -; ZNVER1-NEXT: sbbb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: sbbb $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: sbbb %dl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: sbbb %dil, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: sbbb (%rsi), %dil # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "sbbb $3, %AL \0A\09 sbbb $3, $0 \0A\09 sbbb $3, $2 \0A\09 sbbb $1, $0 \0A\09 sbbb $0, $2 \0A\09 sbbb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind - ret void -} -define void @test_sbb_16(i16 %a0, i16* %a1, i16 %a2) optsize { -; GENERIC-LABEL: test_sbb_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: sbbw $511, %ax # imm = 0x1FF -; GENERIC-NEXT: # sched: [2:0.67] -; GENERIC-NEXT: sbbw $511, %di # imm = 0x1FF -; GENERIC-NEXT: # sched: [2:0.67] -; GENERIC-NEXT: sbbw $511, (%rsi) # imm = 0x1FF -; GENERIC-NEXT: # sched: [9:1.00] -; GENERIC-NEXT: sbbw $7, %di # sched: [2:0.67] -; GENERIC-NEXT: sbbw $7, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: sbbw %dx, %di # sched: [2:0.67] -; GENERIC-NEXT: sbbw %di, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: sbbw (%rsi), %di # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sbb_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: sbbw $511, %ax # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: sbbw $511, %di # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: sbbw $511, (%rsi) # imm = 0x1FF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: sbbw $7, %di # sched: [1:0.50] -; ATOM-NEXT: sbbw $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: sbbw %dx, %di # sched: [1:0.50] -; ATOM-NEXT: sbbw %di, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: sbbw (%rsi), %di # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sbb_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: sbbw $511, %ax # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: sbbw $511, %di # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: sbbw $511, (%rsi) # imm = 0x1FF -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: sbbw $7, %di # sched: [1:0.50] -; SLM-NEXT: sbbw $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: sbbw %dx, %di # sched: [1:0.50] -; SLM-NEXT: sbbw %di, (%rsi) # sched: [5:2.00] -; SLM-NEXT: sbbw (%rsi), %di # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_sbb_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: sbbw $511, %ax # imm = 0x1FF -; SANDY-NEXT: # sched: [2:0.67] -; SANDY-NEXT: sbbw $511, %di # imm = 0x1FF -; SANDY-NEXT: # sched: [2:0.67] -; SANDY-NEXT: sbbw $511, (%rsi) # imm = 0x1FF -; SANDY-NEXT: # sched: [9:1.00] -; SANDY-NEXT: sbbw $7, %di # sched: [2:0.67] -; SANDY-NEXT: sbbw $7, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: sbbw %dx, %di # sched: [2:0.67] -; SANDY-NEXT: sbbw %di, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: sbbw (%rsi), %di # sched: [7:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sbb_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: sbbw $511, %ax # imm = 0x1FF -; HASWELL-NEXT: # sched: [2:0.50] -; HASWELL-NEXT: sbbw $511, %di # imm = 0x1FF -; HASWELL-NEXT: # sched: [2:0.50] -; HASWELL-NEXT: sbbw $511, (%rsi) # imm = 0x1FF -; HASWELL-NEXT: # sched: [9:1.00] -; HASWELL-NEXT: sbbw $7, %di # sched: [2:0.50] -; HASWELL-NEXT: sbbw $7, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: sbbw %dx, %di # sched: [2:0.50] -; HASWELL-NEXT: sbbw %di, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: sbbw (%rsi), %di # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sbb_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: sbbw $511, %ax # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.50] -; BROADWELL-NEXT: sbbw $511, %di # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.50] -; BROADWELL-NEXT: sbbw $511, (%rsi) # imm = 0x1FF -; BROADWELL-NEXT: # sched: [8:1.00] -; BROADWELL-NEXT: sbbw $7, %di # sched: [1:0.50] -; BROADWELL-NEXT: sbbw $7, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: sbbw %dx, %di # sched: [1:0.50] -; BROADWELL-NEXT: sbbw %di, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: sbbw (%rsi), %di # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sbb_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: sbbw $511, %ax # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.50] -; SKYLAKE-NEXT: sbbw $511, %di # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.50] -; SKYLAKE-NEXT: sbbw $511, (%rsi) # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [8:1.00] -; SKYLAKE-NEXT: sbbw $7, %di # sched: [1:0.50] -; SKYLAKE-NEXT: sbbw $7, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: sbbw %dx, %di # sched: [1:0.50] -; SKYLAKE-NEXT: sbbw %di, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: sbbw (%rsi), %di # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sbb_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: sbbw $511, %ax # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.50] -; SKX-NEXT: sbbw $511, %di # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.50] -; SKX-NEXT: sbbw $511, (%rsi) # imm = 0x1FF -; SKX-NEXT: # sched: [8:1.00] -; SKX-NEXT: sbbw $7, %di # sched: [1:0.50] -; SKX-NEXT: sbbw $7, (%rsi) # sched: [8:1.00] -; SKX-NEXT: sbbw %dx, %di # sched: [1:0.50] -; SKX-NEXT: sbbw %di, (%rsi) # sched: [8:1.00] -; SKX-NEXT: sbbw (%rsi), %di # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sbb_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: sbbw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: sbbw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: sbbw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: sbbw $7, %di # sched: [1:1.00] -; BDVER2-NEXT: sbbw $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: sbbw %dx, %di # sched: [1:1.00] -; BDVER2-NEXT: sbbw %di, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: sbbw (%rsi), %di # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sbb_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: sbbw $511, %ax # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: sbbw $511, %di # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: sbbw $511, (%rsi) # imm = 0x1FF -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: sbbw $7, %di # sched: [1:1.00] -; BTVER2-NEXT: sbbw $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: sbbw %dx, %di # sched: [1:1.00] -; BTVER2-NEXT: sbbw %di, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: sbbw (%rsi), %di # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sbb_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: sbbw $511, %ax # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: sbbw $511, %di # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: sbbw $511, (%rsi) # imm = 0x1FF -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: sbbw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: sbbw $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: sbbw %dx, %di # sched: [1:0.25] -; ZNVER1-NEXT: sbbw %di, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: sbbw (%rsi), %di # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "sbbw $3, %AX \0A\09 sbbw $3, $0 \0A\09 sbbw $3, $2 \0A\09 sbbw $4, $0 \0A\09 sbbw $4, $2 \0A\09 sbbw $1, $0 \0A\09 sbbw $0, $2 \0A\09 sbbw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind - ret void -} -define void @test_sbb_32(i32 %a0, i32* %a1, i32 %a2) optsize { -; GENERIC-LABEL: test_sbb_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: sbbl $665536, %eax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [2:0.67] -; GENERIC-NEXT: sbbl $665536, %edi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [2:0.67] -; GENERIC-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [9:1.00] -; GENERIC-NEXT: sbbl $7, %edi # sched: [2:0.67] -; GENERIC-NEXT: sbbl $7, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: sbbl %edx, %edi # sched: [2:0.67] -; GENERIC-NEXT: sbbl %edi, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: sbbl (%rsi), %edi # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sbb_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: sbbl $665536, %eax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: sbbl $665536, %edi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: sbbl $7, %edi # sched: [1:0.50] -; ATOM-NEXT: sbbl $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: sbbl %edx, %edi # sched: [1:0.50] -; ATOM-NEXT: sbbl %edi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: sbbl (%rsi), %edi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sbb_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: sbbl $665536, %eax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: sbbl $665536, %edi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: sbbl $7, %edi # sched: [1:0.50] -; SLM-NEXT: sbbl $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: sbbl %edx, %edi # sched: [1:0.50] -; SLM-NEXT: sbbl %edi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: sbbl (%rsi), %edi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_sbb_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: sbbl $665536, %eax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [2:0.67] -; SANDY-NEXT: sbbl $665536, %edi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [2:0.67] -; SANDY-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [9:1.00] -; SANDY-NEXT: sbbl $7, %edi # sched: [2:0.67] -; SANDY-NEXT: sbbl $7, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: sbbl %edx, %edi # sched: [2:0.67] -; SANDY-NEXT: sbbl %edi, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: sbbl (%rsi), %edi # sched: [7:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sbb_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: sbbl $665536, %eax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [2:0.50] -; HASWELL-NEXT: sbbl $665536, %edi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [2:0.50] -; HASWELL-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [9:1.00] -; HASWELL-NEXT: sbbl $7, %edi # sched: [2:0.50] -; HASWELL-NEXT: sbbl $7, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: sbbl %edx, %edi # sched: [2:0.50] -; HASWELL-NEXT: sbbl %edi, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: sbbl (%rsi), %edi # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sbb_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: sbbl $665536, %eax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.50] -; BROADWELL-NEXT: sbbl $665536, %edi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.50] -; BROADWELL-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [8:1.00] -; BROADWELL-NEXT: sbbl $7, %edi # sched: [1:0.50] -; BROADWELL-NEXT: sbbl $7, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: sbbl %edx, %edi # sched: [1:0.50] -; BROADWELL-NEXT: sbbl %edi, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: sbbl (%rsi), %edi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sbb_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: sbbl $665536, %eax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.50] -; SKYLAKE-NEXT: sbbl $665536, %edi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.50] -; SKYLAKE-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [8:1.00] -; SKYLAKE-NEXT: sbbl $7, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: sbbl $7, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: sbbl %edx, %edi # sched: [1:0.50] -; SKYLAKE-NEXT: sbbl %edi, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: sbbl (%rsi), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sbb_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: sbbl $665536, %eax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.50] -; SKX-NEXT: sbbl $665536, %edi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.50] -; SKX-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [8:1.00] -; SKX-NEXT: sbbl $7, %edi # sched: [1:0.50] -; SKX-NEXT: sbbl $7, (%rsi) # sched: [8:1.00] -; SKX-NEXT: sbbl %edx, %edi # sched: [1:0.50] -; SKX-NEXT: sbbl %edi, (%rsi) # sched: [8:1.00] -; SKX-NEXT: sbbl (%rsi), %edi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sbb_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: sbbl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: sbbl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: sbbl $7, %edi # sched: [1:1.00] -; BDVER2-NEXT: sbbl $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: sbbl %edx, %edi # sched: [1:1.00] -; BDVER2-NEXT: sbbl %edi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: sbbl (%rsi), %edi # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sbb_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: sbbl $665536, %eax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: sbbl $665536, %edi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: sbbl $7, %edi # sched: [1:1.00] -; BTVER2-NEXT: sbbl $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: sbbl %edx, %edi # sched: [1:1.00] -; BTVER2-NEXT: sbbl %edi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: sbbl (%rsi), %edi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sbb_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: sbbl $665536, %eax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: sbbl $665536, %edi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: sbbl $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: sbbl $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: sbbl %edx, %edi # sched: [1:0.25] -; ZNVER1-NEXT: sbbl %edi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: sbbl (%rsi), %edi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "sbbl $3, %EAX \0A\09 sbbl $3, $0 \0A\09 sbbl $3, $2 \0A\09 sbbl $4, $0 \0A\09 sbbl $4, $2 \0A\09 sbbl $1, $0 \0A\09 sbbl $0, $2 \0A\09 sbbl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind - ret void -} -define void @test_sbb_64(i64 %a0, i64* %a1, i64 %a2) optsize { -; GENERIC-LABEL: test_sbb_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: sbbq $665536, %rax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [2:0.67] -; GENERIC-NEXT: sbbq $665536, %rdi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [2:0.67] -; GENERIC-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [9:1.00] -; GENERIC-NEXT: sbbq $7, %rdi # sched: [2:0.67] -; GENERIC-NEXT: sbbq $7, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: sbbq %rdx, %rdi # sched: [2:0.67] -; GENERIC-NEXT: sbbq %rdi, (%rsi) # sched: [9:1.00] -; GENERIC-NEXT: sbbq (%rsi), %rdi # sched: [7:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sbb_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: sbbq $665536, %rax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: sbbq $665536, %rdi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: sbbq $7, %rdi # sched: [1:0.50] -; ATOM-NEXT: sbbq $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: sbbq %rdx, %rdi # sched: [1:0.50] -; ATOM-NEXT: sbbq %rdi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: sbbq (%rsi), %rdi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sbb_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: sbbq $665536, %rax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: sbbq $665536, %rdi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: sbbq $7, %rdi # sched: [1:0.50] -; SLM-NEXT: sbbq $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: sbbq %rdx, %rdi # sched: [1:0.50] -; SLM-NEXT: sbbq %rdi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: sbbq (%rsi), %rdi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_sbb_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: sbbq $665536, %rax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [2:0.67] -; SANDY-NEXT: sbbq $665536, %rdi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [2:0.67] -; SANDY-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [9:1.00] -; SANDY-NEXT: sbbq $7, %rdi # sched: [2:0.67] -; SANDY-NEXT: sbbq $7, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: sbbq %rdx, %rdi # sched: [2:0.67] -; SANDY-NEXT: sbbq %rdi, (%rsi) # sched: [9:1.00] -; SANDY-NEXT: sbbq (%rsi), %rdi # sched: [7:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sbb_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: sbbq $665536, %rax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [2:0.50] -; HASWELL-NEXT: sbbq $665536, %rdi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [2:0.50] -; HASWELL-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [9:1.00] -; HASWELL-NEXT: sbbq $7, %rdi # sched: [2:0.50] -; HASWELL-NEXT: sbbq $7, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: sbbq %rdx, %rdi # sched: [2:0.50] -; HASWELL-NEXT: sbbq %rdi, (%rsi) # sched: [9:1.00] -; HASWELL-NEXT: sbbq (%rsi), %rdi # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sbb_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: sbbq $665536, %rax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.50] -; BROADWELL-NEXT: sbbq $665536, %rdi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.50] -; BROADWELL-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [8:1.00] -; BROADWELL-NEXT: sbbq $7, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: sbbq $7, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: sbbq %rdx, %rdi # sched: [1:0.50] -; BROADWELL-NEXT: sbbq %rdi, (%rsi) # sched: [8:1.00] -; BROADWELL-NEXT: sbbq (%rsi), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sbb_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: sbbq $665536, %rax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.50] -; SKYLAKE-NEXT: sbbq $665536, %rdi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.50] -; SKYLAKE-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [8:1.00] -; SKYLAKE-NEXT: sbbq $7, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: sbbq $7, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: sbbq %rdx, %rdi # sched: [1:0.50] -; SKYLAKE-NEXT: sbbq %rdi, (%rsi) # sched: [8:1.00] -; SKYLAKE-NEXT: sbbq (%rsi), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sbb_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: sbbq $665536, %rax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.50] -; SKX-NEXT: sbbq $665536, %rdi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.50] -; SKX-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [8:1.00] -; SKX-NEXT: sbbq $7, %rdi # sched: [1:0.50] -; SKX-NEXT: sbbq $7, (%rsi) # sched: [8:1.00] -; SKX-NEXT: sbbq %rdx, %rdi # sched: [1:0.50] -; SKX-NEXT: sbbq %rdi, (%rsi) # sched: [8:1.00] -; SKX-NEXT: sbbq (%rsi), %rdi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sbb_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: sbbq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: sbbq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: sbbq $7, %rdi # sched: [1:1.00] -; BDVER2-NEXT: sbbq $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: sbbq %rdx, %rdi # sched: [1:1.00] -; BDVER2-NEXT: sbbq %rdi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: sbbq (%rsi), %rdi # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sbb_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: sbbq $665536, %rax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: sbbq $665536, %rdi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:1.00] -; BTVER2-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: sbbq $7, %rdi # sched: [1:1.00] -; BTVER2-NEXT: sbbq $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: sbbq %rdx, %rdi # sched: [1:1.00] -; BTVER2-NEXT: sbbq %rdi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: sbbq (%rsi), %rdi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sbb_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: sbbq $665536, %rax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: sbbq $665536, %rdi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: sbbq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: sbbq $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: sbbq %rdx, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: sbbq %rdi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: sbbq (%rsi), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "sbbq $3, %RAX \0A\09 sbbq $3, $0 \0A\09 sbbq $3, $2 \0A\09 sbbq $4, $0 \0A\09 sbbq $4, $2 \0A\09 sbbq $1, $0 \0A\09 sbbq $0, $2 \0A\09 sbbq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind - ret void -} - -define void @test_scas() optsize { -; GENERIC-LABEL: test_scas: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: scasb %es:(%rdi), %al # sched: [2:0.67] -; GENERIC-NEXT: scasw %es:(%rdi), %ax # sched: [2:0.67] -; GENERIC-NEXT: scasl %es:(%rdi), %eax # sched: [2:0.67] -; GENERIC-NEXT: scasq %es:(%rdi), %rax # sched: [2:0.67] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_scas: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: scasb %es:(%rdi), %al # sched: [2:1.00] -; ATOM-NEXT: scasw %es:(%rdi), %ax # sched: [2:1.00] -; ATOM-NEXT: scasl %es:(%rdi), %eax # sched: [2:1.00] -; ATOM-NEXT: scasq %es:(%rdi), %rax # sched: [2:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_scas: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: scasb %es:(%rdi), %al # sched: [100:1.00] -; SLM-NEXT: scasw %es:(%rdi), %ax # sched: [100:1.00] -; SLM-NEXT: scasl %es:(%rdi), %eax # sched: [100:1.00] -; SLM-NEXT: scasq %es:(%rdi), %rax # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_scas: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: scasb %es:(%rdi), %al # sched: [2:0.67] -; SANDY-NEXT: scasw %es:(%rdi), %ax # sched: [2:0.67] -; SANDY-NEXT: scasl %es:(%rdi), %eax # sched: [2:0.67] -; SANDY-NEXT: scasq %es:(%rdi), %rax # sched: [2:0.67] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_scas: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: scasb %es:(%rdi), %al # sched: [7:0.50] -; HASWELL-NEXT: scasw %es:(%rdi), %ax # sched: [7:0.50] -; HASWELL-NEXT: scasl %es:(%rdi), %eax # sched: [7:0.50] -; HASWELL-NEXT: scasq %es:(%rdi), %rax # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_scas: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: scasb %es:(%rdi), %al # sched: [7:0.50] -; BROADWELL-NEXT: scasw %es:(%rdi), %ax # sched: [7:0.50] -; BROADWELL-NEXT: scasl %es:(%rdi), %eax # sched: [7:0.50] -; BROADWELL-NEXT: scasq %es:(%rdi), %rax # sched: [7:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_scas: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: scasb %es:(%rdi), %al # sched: [7:0.50] -; SKYLAKE-NEXT: scasw %es:(%rdi), %ax # sched: [7:0.50] -; SKYLAKE-NEXT: scasl %es:(%rdi), %eax # sched: [7:0.50] -; SKYLAKE-NEXT: scasq %es:(%rdi), %rax # sched: [7:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_scas: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: scasb %es:(%rdi), %al # sched: [7:0.50] -; SKX-NEXT: scasw %es:(%rdi), %ax # sched: [7:0.50] -; SKX-NEXT: scasl %es:(%rdi), %eax # sched: [7:0.50] -; SKX-NEXT: scasq %es:(%rdi), %rax # sched: [7:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_scas: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: scasb %es:(%rdi), %al # sched: [100:0.50] -; BDVER2-NEXT: scasw %es:(%rdi), %ax # sched: [100:0.50] -; BDVER2-NEXT: scasl %es:(%rdi), %eax # sched: [100:0.50] -; BDVER2-NEXT: scasq %es:(%rdi), %rax # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_scas: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: scasb %es:(%rdi), %al # sched: [100:0.50] -; BTVER2-NEXT: scasw %es:(%rdi), %ax # sched: [100:0.50] -; BTVER2-NEXT: scasl %es:(%rdi), %eax # sched: [100:0.50] -; BTVER2-NEXT: scasq %es:(%rdi), %rax # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_scas: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: scasb %es:(%rdi), %al # sched: [100:0.25] -; ZNVER1-NEXT: scasw %es:(%rdi), %ax # sched: [100:0.25] -; ZNVER1-NEXT: scasl %es:(%rdi), %eax # sched: [100:0.25] -; ZNVER1-NEXT: scasq %es:(%rdi), %rax # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "scasb \0A\09 scasw \0A\09 scasl \0A\09 scasq", ""() - ret void -} - -define void @test_setcc(i8 %a0, i8 *%a1) optsize { -; GENERIC-LABEL: test_setcc: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: seto %dil # sched: [1:0.50] -; GENERIC-NEXT: setno %dil # sched: [1:0.50] -; GENERIC-NEXT: setb %dil # sched: [1:0.50] -; GENERIC-NEXT: setae %dil # sched: [1:0.50] -; GENERIC-NEXT: sete %dil # sched: [1:0.50] -; GENERIC-NEXT: setne %dil # sched: [1:0.50] -; GENERIC-NEXT: setbe %dil # sched: [2:1.00] -; GENERIC-NEXT: seta %dil # sched: [2:1.00] -; GENERIC-NEXT: sets %dil # sched: [1:0.50] -; GENERIC-NEXT: setns %dil # sched: [1:0.50] -; GENERIC-NEXT: setp %dil # sched: [1:0.50] -; GENERIC-NEXT: setnp %dil # sched: [1:0.50] -; GENERIC-NEXT: setl %dil # sched: [1:0.50] -; GENERIC-NEXT: setge %dil # sched: [1:0.50] -; GENERIC-NEXT: setle %dil # sched: [1:0.50] -; GENERIC-NEXT: setg %dil # sched: [1:0.50] -; GENERIC-NEXT: seto (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: setno (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: setb (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: setae (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: sete (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: setne (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: setbe (%rsi) # sched: [3:1.00] -; GENERIC-NEXT: seta (%rsi) # sched: [3:1.00] -; GENERIC-NEXT: sets (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: setns (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: setp (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: setnp (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: setl (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: setge (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: setle (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: setg (%rsi) # sched: [2:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_setcc: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: seto %dil # sched: [1:0.50] -; ATOM-NEXT: setno %dil # sched: [1:0.50] -; ATOM-NEXT: setb %dil # sched: [1:0.50] -; ATOM-NEXT: setae %dil # sched: [1:0.50] -; ATOM-NEXT: sete %dil # sched: [1:0.50] -; ATOM-NEXT: setne %dil # sched: [1:0.50] -; ATOM-NEXT: setbe %dil # sched: [1:0.50] -; ATOM-NEXT: seta %dil # sched: [1:0.50] -; ATOM-NEXT: sets %dil # sched: [1:0.50] -; ATOM-NEXT: setns %dil # sched: [1:0.50] -; ATOM-NEXT: setp %dil # sched: [1:0.50] -; ATOM-NEXT: setnp %dil # sched: [1:0.50] -; ATOM-NEXT: setl %dil # sched: [1:0.50] -; ATOM-NEXT: setge %dil # sched: [1:0.50] -; ATOM-NEXT: setle %dil # sched: [1:0.50] -; ATOM-NEXT: setg %dil # sched: [1:0.50] -; ATOM-NEXT: seto (%rsi) # sched: [2:1.00] -; ATOM-NEXT: setno (%rsi) # sched: [2:1.00] -; ATOM-NEXT: setb (%rsi) # sched: [2:1.00] -; ATOM-NEXT: setae (%rsi) # sched: [2:1.00] -; ATOM-NEXT: sete (%rsi) # sched: [2:1.00] -; ATOM-NEXT: setne (%rsi) # sched: [2:1.00] -; ATOM-NEXT: setbe (%rsi) # sched: [2:1.00] -; ATOM-NEXT: seta (%rsi) # sched: [2:1.00] -; ATOM-NEXT: sets (%rsi) # sched: [2:1.00] -; ATOM-NEXT: setns (%rsi) # sched: [2:1.00] -; ATOM-NEXT: setp (%rsi) # sched: [2:1.00] -; ATOM-NEXT: setnp (%rsi) # sched: [2:1.00] -; ATOM-NEXT: setl (%rsi) # sched: [2:1.00] -; ATOM-NEXT: setge (%rsi) # sched: [2:1.00] -; ATOM-NEXT: setle (%rsi) # sched: [2:1.00] -; ATOM-NEXT: setg (%rsi) # sched: [2:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_setcc: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: seto %dil # sched: [1:0.50] -; SLM-NEXT: setno %dil # sched: [1:0.50] -; SLM-NEXT: setb %dil # sched: [1:0.50] -; SLM-NEXT: setae %dil # sched: [1:0.50] -; SLM-NEXT: sete %dil # sched: [1:0.50] -; SLM-NEXT: setne %dil # sched: [1:0.50] -; SLM-NEXT: setbe %dil # sched: [1:0.50] -; SLM-NEXT: seta %dil # sched: [1:0.50] -; SLM-NEXT: sets %dil # sched: [1:0.50] -; SLM-NEXT: setns %dil # sched: [1:0.50] -; SLM-NEXT: setp %dil # sched: [1:0.50] -; SLM-NEXT: setnp %dil # sched: [1:0.50] -; SLM-NEXT: setl %dil # sched: [1:0.50] -; SLM-NEXT: setge %dil # sched: [1:0.50] -; SLM-NEXT: setle %dil # sched: [1:0.50] -; SLM-NEXT: setg %dil # sched: [1:0.50] -; SLM-NEXT: seto (%rsi) # sched: [1:1.00] -; SLM-NEXT: setno (%rsi) # sched: [1:1.00] -; SLM-NEXT: setb (%rsi) # sched: [1:1.00] -; SLM-NEXT: setae (%rsi) # sched: [1:1.00] -; SLM-NEXT: sete (%rsi) # sched: [1:1.00] -; SLM-NEXT: setne (%rsi) # sched: [1:1.00] -; SLM-NEXT: setbe (%rsi) # sched: [1:1.00] -; SLM-NEXT: seta (%rsi) # sched: [1:1.00] -; SLM-NEXT: sets (%rsi) # sched: [1:1.00] -; SLM-NEXT: setns (%rsi) # sched: [1:1.00] -; SLM-NEXT: setp (%rsi) # sched: [1:1.00] -; SLM-NEXT: setnp (%rsi) # sched: [1:1.00] -; SLM-NEXT: setl (%rsi) # sched: [1:1.00] -; SLM-NEXT: setge (%rsi) # sched: [1:1.00] -; SLM-NEXT: setle (%rsi) # sched: [1:1.00] -; SLM-NEXT: setg (%rsi) # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_setcc: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: seto %dil # sched: [1:0.50] -; SANDY-NEXT: setno %dil # sched: [1:0.50] -; SANDY-NEXT: setb %dil # sched: [1:0.50] -; SANDY-NEXT: setae %dil # sched: [1:0.50] -; SANDY-NEXT: sete %dil # sched: [1:0.50] -; SANDY-NEXT: setne %dil # sched: [1:0.50] -; SANDY-NEXT: setbe %dil # sched: [2:1.00] -; SANDY-NEXT: seta %dil # sched: [2:1.00] -; SANDY-NEXT: sets %dil # sched: [1:0.50] -; SANDY-NEXT: setns %dil # sched: [1:0.50] -; SANDY-NEXT: setp %dil # sched: [1:0.50] -; SANDY-NEXT: setnp %dil # sched: [1:0.50] -; SANDY-NEXT: setl %dil # sched: [1:0.50] -; SANDY-NEXT: setge %dil # sched: [1:0.50] -; SANDY-NEXT: setle %dil # sched: [1:0.50] -; SANDY-NEXT: setg %dil # sched: [1:0.50] -; SANDY-NEXT: seto (%rsi) # sched: [2:1.00] -; SANDY-NEXT: setno (%rsi) # sched: [2:1.00] -; SANDY-NEXT: setb (%rsi) # sched: [2:1.00] -; SANDY-NEXT: setae (%rsi) # sched: [2:1.00] -; SANDY-NEXT: sete (%rsi) # sched: [2:1.00] -; SANDY-NEXT: setne (%rsi) # sched: [2:1.00] -; SANDY-NEXT: setbe (%rsi) # sched: [3:1.00] -; SANDY-NEXT: seta (%rsi) # sched: [3:1.00] -; SANDY-NEXT: sets (%rsi) # sched: [2:1.00] -; SANDY-NEXT: setns (%rsi) # sched: [2:1.00] -; SANDY-NEXT: setp (%rsi) # sched: [2:1.00] -; SANDY-NEXT: setnp (%rsi) # sched: [2:1.00] -; SANDY-NEXT: setl (%rsi) # sched: [2:1.00] -; SANDY-NEXT: setge (%rsi) # sched: [2:1.00] -; SANDY-NEXT: setle (%rsi) # sched: [2:1.00] -; SANDY-NEXT: setg (%rsi) # sched: [2:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_setcc: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: seto %dil # sched: [1:0.50] -; HASWELL-NEXT: setno %dil # sched: [1:0.50] -; HASWELL-NEXT: setb %dil # sched: [1:0.50] -; HASWELL-NEXT: setae %dil # sched: [1:0.50] -; HASWELL-NEXT: sete %dil # sched: [1:0.50] -; HASWELL-NEXT: setne %dil # sched: [1:0.50] -; HASWELL-NEXT: setbe %dil # sched: [2:0.50] -; HASWELL-NEXT: seta %dil # sched: [2:0.50] -; HASWELL-NEXT: sets %dil # sched: [1:0.50] -; HASWELL-NEXT: setns %dil # sched: [1:0.50] -; HASWELL-NEXT: setp %dil # sched: [1:0.50] -; HASWELL-NEXT: setnp %dil # sched: [1:0.50] -; HASWELL-NEXT: setl %dil # sched: [1:0.50] -; HASWELL-NEXT: setge %dil # sched: [1:0.50] -; HASWELL-NEXT: setle %dil # sched: [1:0.50] -; HASWELL-NEXT: setg %dil # sched: [1:0.50] -; HASWELL-NEXT: seto (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: setno (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: setb (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: setae (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: sete (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: setne (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: setbe (%rsi) # sched: [3:1.00] -; HASWELL-NEXT: seta (%rsi) # sched: [3:1.00] -; HASWELL-NEXT: sets (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: setns (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: setp (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: setnp (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: setl (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: setge (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: setle (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: setg (%rsi) # sched: [2:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_setcc: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: seto %dil # sched: [1:0.50] -; BROADWELL-NEXT: setno %dil # sched: [1:0.50] -; BROADWELL-NEXT: setb %dil # sched: [1:0.50] -; BROADWELL-NEXT: setae %dil # sched: [1:0.50] -; BROADWELL-NEXT: sete %dil # sched: [1:0.50] -; BROADWELL-NEXT: setne %dil # sched: [1:0.50] -; BROADWELL-NEXT: setbe %dil # sched: [2:0.50] -; BROADWELL-NEXT: seta %dil # sched: [2:0.50] -; BROADWELL-NEXT: sets %dil # sched: [1:0.50] -; BROADWELL-NEXT: setns %dil # sched: [1:0.50] -; BROADWELL-NEXT: setp %dil # sched: [1:0.50] -; BROADWELL-NEXT: setnp %dil # sched: [1:0.50] -; BROADWELL-NEXT: setl %dil # sched: [1:0.50] -; BROADWELL-NEXT: setge %dil # sched: [1:0.50] -; BROADWELL-NEXT: setle %dil # sched: [1:0.50] -; BROADWELL-NEXT: setg %dil # sched: [1:0.50] -; BROADWELL-NEXT: seto (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: setno (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: setb (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: setae (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: sete (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: setne (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: setbe (%rsi) # sched: [3:1.00] -; BROADWELL-NEXT: seta (%rsi) # sched: [3:1.00] -; BROADWELL-NEXT: sets (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: setns (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: setp (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: setnp (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: setl (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: setge (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: setle (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: setg (%rsi) # sched: [2:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_setcc: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: seto %dil # sched: [1:0.50] -; SKYLAKE-NEXT: setno %dil # sched: [1:0.50] -; SKYLAKE-NEXT: setb %dil # sched: [1:0.50] -; SKYLAKE-NEXT: setae %dil # sched: [1:0.50] -; SKYLAKE-NEXT: sete %dil # sched: [1:0.50] -; SKYLAKE-NEXT: setne %dil # sched: [1:0.50] -; SKYLAKE-NEXT: setbe %dil # sched: [2:1.00] -; SKYLAKE-NEXT: seta %dil # sched: [2:1.00] -; SKYLAKE-NEXT: sets %dil # sched: [1:0.50] -; SKYLAKE-NEXT: setns %dil # sched: [1:0.50] -; SKYLAKE-NEXT: setp %dil # sched: [1:0.50] -; SKYLAKE-NEXT: setnp %dil # sched: [1:0.50] -; SKYLAKE-NEXT: setl %dil # sched: [1:0.50] -; SKYLAKE-NEXT: setge %dil # sched: [1:0.50] -; SKYLAKE-NEXT: setle %dil # sched: [1:0.50] -; SKYLAKE-NEXT: setg %dil # sched: [1:0.50] -; SKYLAKE-NEXT: seto (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: setno (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: setb (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: setae (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: sete (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: setne (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: setbe (%rsi) # sched: [3:1.00] -; SKYLAKE-NEXT: seta (%rsi) # sched: [3:1.00] -; SKYLAKE-NEXT: sets (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: setns (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: setp (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: setnp (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: setl (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: setge (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: setle (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: setg (%rsi) # sched: [2:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_setcc: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: seto %dil # sched: [1:0.50] -; SKX-NEXT: setno %dil # sched: [1:0.50] -; SKX-NEXT: setb %dil # sched: [1:0.50] -; SKX-NEXT: setae %dil # sched: [1:0.50] -; SKX-NEXT: sete %dil # sched: [1:0.50] -; SKX-NEXT: setne %dil # sched: [1:0.50] -; SKX-NEXT: setbe %dil # sched: [2:1.00] -; SKX-NEXT: seta %dil # sched: [2:1.00] -; SKX-NEXT: sets %dil # sched: [1:0.50] -; SKX-NEXT: setns %dil # sched: [1:0.50] -; SKX-NEXT: setp %dil # sched: [1:0.50] -; SKX-NEXT: setnp %dil # sched: [1:0.50] -; SKX-NEXT: setl %dil # sched: [1:0.50] -; SKX-NEXT: setge %dil # sched: [1:0.50] -; SKX-NEXT: setle %dil # sched: [1:0.50] -; SKX-NEXT: setg %dil # sched: [1:0.50] -; SKX-NEXT: seto (%rsi) # sched: [2:1.00] -; SKX-NEXT: setno (%rsi) # sched: [2:1.00] -; SKX-NEXT: setb (%rsi) # sched: [2:1.00] -; SKX-NEXT: setae (%rsi) # sched: [2:1.00] -; SKX-NEXT: sete (%rsi) # sched: [2:1.00] -; SKX-NEXT: setne (%rsi) # sched: [2:1.00] -; SKX-NEXT: setbe (%rsi) # sched: [3:1.00] -; SKX-NEXT: seta (%rsi) # sched: [3:1.00] -; SKX-NEXT: sets (%rsi) # sched: [2:1.00] -; SKX-NEXT: setns (%rsi) # sched: [2:1.00] -; SKX-NEXT: setp (%rsi) # sched: [2:1.00] -; SKX-NEXT: setnp (%rsi) # sched: [2:1.00] -; SKX-NEXT: setl (%rsi) # sched: [2:1.00] -; SKX-NEXT: setge (%rsi) # sched: [2:1.00] -; SKX-NEXT: setle (%rsi) # sched: [2:1.00] -; SKX-NEXT: setg (%rsi) # sched: [2:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_setcc: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: seto %dil # sched: [1:0.50] -; BDVER2-NEXT: setno %dil # sched: [1:0.50] -; BDVER2-NEXT: setb %dil # sched: [1:0.50] -; BDVER2-NEXT: setae %dil # sched: [1:0.50] -; BDVER2-NEXT: sete %dil # sched: [1:0.50] -; BDVER2-NEXT: setne %dil # sched: [1:0.50] -; BDVER2-NEXT: setbe %dil # sched: [1:0.50] -; BDVER2-NEXT: seta %dil # sched: [1:0.50] -; BDVER2-NEXT: sets %dil # sched: [1:0.50] -; BDVER2-NEXT: setns %dil # sched: [1:0.50] -; BDVER2-NEXT: setp %dil # sched: [1:0.50] -; BDVER2-NEXT: setnp %dil # sched: [1:0.50] -; BDVER2-NEXT: setl %dil # sched: [1:0.50] -; BDVER2-NEXT: setge %dil # sched: [1:0.50] -; BDVER2-NEXT: setle %dil # sched: [1:0.50] -; BDVER2-NEXT: setg %dil # sched: [1:0.50] -; BDVER2-NEXT: seto (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: setno (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: setb (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: setae (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: sete (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: setne (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: setbe (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: seta (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: sets (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: setns (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: setp (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: setnp (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: setl (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: setge (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: setle (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: setg (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_setcc: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: seto %dil # sched: [1:0.50] -; BTVER2-NEXT: setno %dil # sched: [1:0.50] -; BTVER2-NEXT: setb %dil # sched: [1:0.50] -; BTVER2-NEXT: setae %dil # sched: [1:0.50] -; BTVER2-NEXT: sete %dil # sched: [1:0.50] -; BTVER2-NEXT: setne %dil # sched: [1:0.50] -; BTVER2-NEXT: setbe %dil # sched: [1:0.50] -; BTVER2-NEXT: seta %dil # sched: [1:0.50] -; BTVER2-NEXT: sets %dil # sched: [1:0.50] -; BTVER2-NEXT: setns %dil # sched: [1:0.50] -; BTVER2-NEXT: setp %dil # sched: [1:0.50] -; BTVER2-NEXT: setnp %dil # sched: [1:0.50] -; BTVER2-NEXT: setl %dil # sched: [1:0.50] -; BTVER2-NEXT: setge %dil # sched: [1:0.50] -; BTVER2-NEXT: setle %dil # sched: [1:0.50] -; BTVER2-NEXT: setg %dil # sched: [1:0.50] -; BTVER2-NEXT: seto (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: setno (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: setb (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: setae (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: sete (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: setne (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: setbe (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: seta (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: sets (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: setns (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: setp (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: setnp (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: setl (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: setge (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: setle (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: setg (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_setcc: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: seto %dil # sched: [1:0.25] -; ZNVER1-NEXT: setno %dil # sched: [1:0.25] -; ZNVER1-NEXT: setb %dil # sched: [1:0.25] -; ZNVER1-NEXT: setae %dil # sched: [1:0.25] -; ZNVER1-NEXT: sete %dil # sched: [1:0.25] -; ZNVER1-NEXT: setne %dil # sched: [1:0.25] -; ZNVER1-NEXT: setbe %dil # sched: [1:0.25] -; ZNVER1-NEXT: seta %dil # sched: [1:0.25] -; ZNVER1-NEXT: sets %dil # sched: [1:0.25] -; ZNVER1-NEXT: setns %dil # sched: [1:0.25] -; ZNVER1-NEXT: setp %dil # sched: [1:0.25] -; ZNVER1-NEXT: setnp %dil # sched: [1:0.25] -; ZNVER1-NEXT: setl %dil # sched: [1:0.25] -; ZNVER1-NEXT: setge %dil # sched: [1:0.25] -; ZNVER1-NEXT: setle %dil # sched: [1:0.25] -; ZNVER1-NEXT: setg %dil # sched: [1:0.25] -; ZNVER1-NEXT: seto (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: setno (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: setb (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: setae (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: sete (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: setne (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: setbe (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: seta (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: sets (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: setns (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: setp (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: setnp (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: setl (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: setge (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: setle (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: setg (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "seto $0 \0A\09 setno $0 \0A\09 setb $0 \0A\09 setnb $0 \0A\09 setz $0 \0A\09 setnz $0 \0A\09 setbe $0 \0A\09 setnbe $0 \0A\09 sets $0 \0A\09 setns $0 \0A\09 setp $0 \0A\09 setnp $0 \0A\09 setl $0 \0A\09 setnl $0 \0A\09 setle $0 \0A\09 setnle $0 \0A\09 seto $1 \0A\09 setno $1 \0A\09 setb $1 \0A\09 setnb $1 \0A\09 setz $1 \0A\09 setnz $1 \0A\09 setbe $1 \0A\09 setnbe $1 \0A\09 sets $1 \0A\09 setns $1 \0A\09 setp $1 \0A\09 setnp $1 \0A\09 setl $1 \0A\09 setnl $1 \0A\09 setle $1 \0A\09 setnle $1", "r,*m"(i8 %a0, i8 *%a1) - ret void -} - -; TODO - test_sgdt - -define void @test_shld_shrd_16(i16 %a0, i16 %a1, i16 *%a2) optsize { -; GENERIC-LABEL: test_shld_shrd_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: shldw %cl, %si, %di # sched: [4:1.50] -; GENERIC-NEXT: shrdw %cl, %si, %di # sched: [4:1.50] -; GENERIC-NEXT: shldw %cl, %si, (%rdx) # sched: [10:1.50] -; GENERIC-NEXT: shrdw %cl, %si, (%rdx) # sched: [10:1.50] -; GENERIC-NEXT: shldw $7, %si, %di # sched: [2:0.67] -; GENERIC-NEXT: shrdw $7, %si, %di # sched: [2:0.67] -; GENERIC-NEXT: shldw $7, %si, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: shrdw $7, %si, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_shld_shrd_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: shldw %cl, %si, %di # sched: [6:3.00] -; ATOM-NEXT: shrdw %cl, %si, %di # sched: [6:3.00] -; ATOM-NEXT: shldw %cl, %si, (%rdx) # sched: [6:3.00] -; ATOM-NEXT: shrdw %cl, %si, (%rdx) # sched: [6:3.00] -; ATOM-NEXT: shldw $7, %si, %di # sched: [6:3.00] -; ATOM-NEXT: shrdw $7, %si, %di # sched: [6:3.00] -; ATOM-NEXT: shldw $7, %si, (%rdx) # sched: [6:3.00] -; ATOM-NEXT: shrdw $7, %si, (%rdx) # sched: [6:3.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_shld_shrd_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: shldw %cl, %si, %di # sched: [1:1.00] -; SLM-NEXT: shrdw %cl, %si, %di # sched: [1:1.00] -; SLM-NEXT: shldw %cl, %si, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrdw %cl, %si, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shldw $7, %si, %di # sched: [1:1.00] -; SLM-NEXT: shrdw $7, %si, %di # sched: [1:1.00] -; SLM-NEXT: shldw $7, %si, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrdw $7, %si, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_shld_shrd_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: shldw %cl, %si, %di # sched: [4:1.50] -; SANDY-NEXT: shrdw %cl, %si, %di # sched: [4:1.50] -; SANDY-NEXT: shldw %cl, %si, (%rdx) # sched: [10:1.50] -; SANDY-NEXT: shrdw %cl, %si, (%rdx) # sched: [10:1.50] -; SANDY-NEXT: shldw $7, %si, %di # sched: [2:0.67] -; SANDY-NEXT: shrdw $7, %si, %di # sched: [2:0.67] -; SANDY-NEXT: shldw $7, %si, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: shrdw $7, %si, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_shld_shrd_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: shldw %cl, %si, %di # sched: [6:1.00] -; HASWELL-NEXT: shrdw %cl, %si, %di # sched: [6:1.00] -; HASWELL-NEXT: shldw %cl, %si, (%rdx) # sched: [12:1.00] -; HASWELL-NEXT: shrdw %cl, %si, (%rdx) # sched: [12:1.00] -; HASWELL-NEXT: shldw $7, %si, %di # sched: [3:1.00] -; HASWELL-NEXT: shrdw $7, %si, %di # sched: [3:1.00] -; HASWELL-NEXT: shldw $7, %si, (%rdx) # sched: [10:1.00] -; HASWELL-NEXT: shrdw $7, %si, (%rdx) # sched: [10:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_shld_shrd_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: shldw %cl, %si, %di # sched: [6:1.00] -; BROADWELL-NEXT: shrdw %cl, %si, %di # sched: [6:1.00] -; BROADWELL-NEXT: shldw %cl, %si, (%rdx) # sched: [11:1.00] -; BROADWELL-NEXT: shrdw %cl, %si, (%rdx) # sched: [11:1.00] -; BROADWELL-NEXT: shldw $7, %si, %di # sched: [3:1.00] -; BROADWELL-NEXT: shrdw $7, %si, %di # sched: [3:1.00] -; BROADWELL-NEXT: shldw $7, %si, (%rdx) # sched: [9:1.00] -; BROADWELL-NEXT: shrdw $7, %si, (%rdx) # sched: [9:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_shld_shrd_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: shldw %cl, %si, %di # sched: [6:1.00] -; SKYLAKE-NEXT: shrdw %cl, %si, %di # sched: [6:1.00] -; SKYLAKE-NEXT: shldw %cl, %si, (%rdx) # sched: [11:1.00] -; SKYLAKE-NEXT: shrdw %cl, %si, (%rdx) # sched: [11:1.00] -; SKYLAKE-NEXT: shldw $7, %si, %di # sched: [3:1.00] -; SKYLAKE-NEXT: shrdw $7, %si, %di # sched: [3:1.00] -; SKYLAKE-NEXT: shldw $7, %si, (%rdx) # sched: [9:1.00] -; SKYLAKE-NEXT: shrdw $7, %si, (%rdx) # sched: [9:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_shld_shrd_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: shldw %cl, %si, %di # sched: [6:1.00] -; SKX-NEXT: shrdw %cl, %si, %di # sched: [6:1.00] -; SKX-NEXT: shldw %cl, %si, (%rdx) # sched: [11:1.00] -; SKX-NEXT: shrdw %cl, %si, (%rdx) # sched: [11:1.00] -; SKX-NEXT: shldw $7, %si, %di # sched: [3:1.00] -; SKX-NEXT: shrdw $7, %si, %di # sched: [3:1.00] -; SKX-NEXT: shldw $7, %si, (%rdx) # sched: [9:1.00] -; SKX-NEXT: shrdw $7, %si, (%rdx) # sched: [9:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_shld_shrd_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: shldw %cl, %si, %di # sched: [4:4.00] -; BDVER2-NEXT: shrdw %cl, %si, %di # sched: [4:4.00] -; BDVER2-NEXT: shldw %cl, %si, (%rdx) # sched: [4:11.00] -; BDVER2-NEXT: shrdw %cl, %si, (%rdx) # sched: [4:11.00] -; BDVER2-NEXT: shldw $7, %si, %di # sched: [4:3.00] -; BDVER2-NEXT: shrdw $7, %si, %di # sched: [3:3.00] -; BDVER2-NEXT: shldw $7, %si, (%rdx) # sched: [4:11.00] -; BDVER2-NEXT: shrdw $7, %si, (%rdx) # sched: [4:11.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_shld_shrd_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: shldw %cl, %si, %di # sched: [4:4.00] -; BTVER2-NEXT: shrdw %cl, %si, %di # sched: [4:4.00] -; BTVER2-NEXT: shldw %cl, %si, (%rdx) # sched: [9:11.00] -; BTVER2-NEXT: shrdw %cl, %si, (%rdx) # sched: [9:11.00] -; BTVER2-NEXT: shldw $7, %si, %di # sched: [3:3.00] -; BTVER2-NEXT: shrdw $7, %si, %di # sched: [3:3.00] -; BTVER2-NEXT: shldw $7, %si, (%rdx) # sched: [9:11.00] -; BTVER2-NEXT: shrdw $7, %si, (%rdx) # sched: [9:11.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_shld_shrd_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: shldw %cl, %si, %di # sched: [100:0.25] -; ZNVER1-NEXT: shrdw %cl, %si, %di # sched: [100:0.25] -; ZNVER1-NEXT: shldw %cl, %si, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: shrdw %cl, %si, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: shldw $7, %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: shrdw $7, %si, %di # sched: [1:0.25] -; ZNVER1-NEXT: shldw $7, %si, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shrdw $7, %si, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "shld $1, $0 \0A\09 shrd $1, $0 \0A\09 shld $1, $2 \0A\09 shrd $1, $2 \0A\09 shld $3, $1, $0 \0A\09 shrd $3, $1, $0 \0A\09 shld $3, $1, $2 \0A\09 shrd $3, $1, $2", "r,r,*m,i"(i16 %a0, i16 %a1, i16 *%a2, i8 7) - ret void -} -define void @test_shld_shrd_32(i32 %a0, i32 %a1, i32 *%a2) optsize { -; GENERIC-LABEL: test_shld_shrd_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: shldl %cl, %esi, %edi # sched: [4:1.50] -; GENERIC-NEXT: shrdl %cl, %esi, %edi # sched: [4:1.50] -; GENERIC-NEXT: shldl %cl, %esi, (%rdx) # sched: [10:1.50] -; GENERIC-NEXT: shrdl %cl, %esi, (%rdx) # sched: [10:1.50] -; GENERIC-NEXT: shldl $7, %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: shrdl $7, %esi, %edi # sched: [2:0.67] -; GENERIC-NEXT: shldl $7, %esi, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: shrdl $7, %esi, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_shld_shrd_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: shldl %cl, %esi, %edi # sched: [2:1.00] -; ATOM-NEXT: shrdl %cl, %esi, %edi # sched: [2:1.00] -; ATOM-NEXT: shldl %cl, %esi, (%rdx) # sched: [4:2.00] -; ATOM-NEXT: shrdl %cl, %esi, (%rdx) # sched: [4:2.00] -; ATOM-NEXT: shldl $7, %esi, %edi # sched: [2:1.00] -; ATOM-NEXT: shrdl $7, %esi, %edi # sched: [2:1.00] -; ATOM-NEXT: shldl $7, %esi, (%rdx) # sched: [4:2.00] -; ATOM-NEXT: shrdl $7, %esi, (%rdx) # sched: [4:2.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_shld_shrd_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: shldl %cl, %esi, %edi # sched: [1:1.00] -; SLM-NEXT: shrdl %cl, %esi, %edi # sched: [1:1.00] -; SLM-NEXT: shldl %cl, %esi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrdl %cl, %esi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shldl $7, %esi, %edi # sched: [1:1.00] -; SLM-NEXT: shrdl $7, %esi, %edi # sched: [1:1.00] -; SLM-NEXT: shldl $7, %esi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrdl $7, %esi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_shld_shrd_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: shldl %cl, %esi, %edi # sched: [4:1.50] -; SANDY-NEXT: shrdl %cl, %esi, %edi # sched: [4:1.50] -; SANDY-NEXT: shldl %cl, %esi, (%rdx) # sched: [10:1.50] -; SANDY-NEXT: shrdl %cl, %esi, (%rdx) # sched: [10:1.50] -; SANDY-NEXT: shldl $7, %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: shrdl $7, %esi, %edi # sched: [2:0.67] -; SANDY-NEXT: shldl $7, %esi, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: shrdl $7, %esi, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_shld_shrd_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: shldl %cl, %esi, %edi # sched: [6:1.00] -; HASWELL-NEXT: shrdl %cl, %esi, %edi # sched: [6:1.00] -; HASWELL-NEXT: shldl %cl, %esi, (%rdx) # sched: [12:1.00] -; HASWELL-NEXT: shrdl %cl, %esi, (%rdx) # sched: [12:1.00] -; HASWELL-NEXT: shldl $7, %esi, %edi # sched: [3:1.00] -; HASWELL-NEXT: shrdl $7, %esi, %edi # sched: [3:1.00] -; HASWELL-NEXT: shldl $7, %esi, (%rdx) # sched: [10:1.00] -; HASWELL-NEXT: shrdl $7, %esi, (%rdx) # sched: [10:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_shld_shrd_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: shldl %cl, %esi, %edi # sched: [6:1.00] -; BROADWELL-NEXT: shrdl %cl, %esi, %edi # sched: [6:1.00] -; BROADWELL-NEXT: shldl %cl, %esi, (%rdx) # sched: [11:1.00] -; BROADWELL-NEXT: shrdl %cl, %esi, (%rdx) # sched: [11:1.00] -; BROADWELL-NEXT: shldl $7, %esi, %edi # sched: [3:1.00] -; BROADWELL-NEXT: shrdl $7, %esi, %edi # sched: [3:1.00] -; BROADWELL-NEXT: shldl $7, %esi, (%rdx) # sched: [9:1.00] -; BROADWELL-NEXT: shrdl $7, %esi, (%rdx) # sched: [9:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_shld_shrd_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: shldl %cl, %esi, %edi # sched: [6:1.00] -; SKYLAKE-NEXT: shrdl %cl, %esi, %edi # sched: [6:1.00] -; SKYLAKE-NEXT: shldl %cl, %esi, (%rdx) # sched: [11:1.00] -; SKYLAKE-NEXT: shrdl %cl, %esi, (%rdx) # sched: [11:1.00] -; SKYLAKE-NEXT: shldl $7, %esi, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: shrdl $7, %esi, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: shldl $7, %esi, (%rdx) # sched: [9:1.00] -; SKYLAKE-NEXT: shrdl $7, %esi, (%rdx) # sched: [9:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_shld_shrd_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: shldl %cl, %esi, %edi # sched: [6:1.00] -; SKX-NEXT: shrdl %cl, %esi, %edi # sched: [6:1.00] -; SKX-NEXT: shldl %cl, %esi, (%rdx) # sched: [11:1.00] -; SKX-NEXT: shrdl %cl, %esi, (%rdx) # sched: [11:1.00] -; SKX-NEXT: shldl $7, %esi, %edi # sched: [3:1.00] -; SKX-NEXT: shrdl $7, %esi, %edi # sched: [3:1.00] -; SKX-NEXT: shldl $7, %esi, (%rdx) # sched: [9:1.00] -; SKX-NEXT: shrdl $7, %esi, (%rdx) # sched: [9:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_shld_shrd_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: shldl %cl, %esi, %edi # sched: [4:4.00] -; BDVER2-NEXT: shrdl %cl, %esi, %edi # sched: [4:4.00] -; BDVER2-NEXT: shldl %cl, %esi, (%rdx) # sched: [4:11.00] -; BDVER2-NEXT: shrdl %cl, %esi, (%rdx) # sched: [4:11.00] -; BDVER2-NEXT: shldl $7, %esi, %edi # sched: [3:3.00] -; BDVER2-NEXT: shrdl $7, %esi, %edi # sched: [4:3.00] -; BDVER2-NEXT: shldl $7, %esi, (%rdx) # sched: [4:11.00] -; BDVER2-NEXT: shrdl $7, %esi, (%rdx) # sched: [4:11.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_shld_shrd_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: shldl %cl, %esi, %edi # sched: [4:4.00] -; BTVER2-NEXT: shrdl %cl, %esi, %edi # sched: [4:4.00] -; BTVER2-NEXT: shldl %cl, %esi, (%rdx) # sched: [9:11.00] -; BTVER2-NEXT: shrdl %cl, %esi, (%rdx) # sched: [9:11.00] -; BTVER2-NEXT: shldl $7, %esi, %edi # sched: [3:3.00] -; BTVER2-NEXT: shrdl $7, %esi, %edi # sched: [3:3.00] -; BTVER2-NEXT: shldl $7, %esi, (%rdx) # sched: [9:11.00] -; BTVER2-NEXT: shrdl $7, %esi, (%rdx) # sched: [9:11.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_shld_shrd_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: shldl %cl, %esi, %edi # sched: [100:0.25] -; ZNVER1-NEXT: shrdl %cl, %esi, %edi # sched: [100:0.25] -; ZNVER1-NEXT: shldl %cl, %esi, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: shrdl %cl, %esi, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: shldl $7, %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: shrdl $7, %esi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: shldl $7, %esi, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shrdl $7, %esi, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "shld $1, $0 \0A\09 shrd $1, $0 \0A\09 shld $1, $2 \0A\09 shrd $1, $2 \0A\09 shld $3, $1, $0 \0A\09 shrd $3, $1, $0 \0A\09 shld $3, $1, $2 \0A\09 shrd $3, $1, $2", "r,r,*m,i"(i32 %a0, i32 %a1, i32 *%a2, i8 7) - ret void -} -define void @test_shld_shrd_64(i64 %a0, i64 %a1, i64 *%a2) optsize { -; GENERIC-LABEL: test_shld_shrd_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: shldq %cl, %rsi, %rdi # sched: [4:1.50] -; GENERIC-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:1.50] -; GENERIC-NEXT: shldq %cl, %rsi, (%rdx) # sched: [10:1.50] -; GENERIC-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [10:1.50] -; GENERIC-NEXT: shldq $7, %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: shrdq $7, %rsi, %rdi # sched: [2:0.67] -; GENERIC-NEXT: shldq $7, %rsi, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: shrdq $7, %rsi, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_shld_shrd_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: shldq %cl, %rsi, %rdi # sched: [8:4.00] -; ATOM-NEXT: shrdq %cl, %rsi, %rdi # sched: [8:4.00] -; ATOM-NEXT: shldq %cl, %rsi, (%rdx) # sched: [9:4.50] -; ATOM-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [9:4.50] -; ATOM-NEXT: shldq $7, %rsi, %rdi # sched: [9:4.50] -; ATOM-NEXT: shrdq $7, %rsi, %rdi # sched: [9:4.50] -; ATOM-NEXT: shldq $7, %rsi, (%rdx) # sched: [9:4.50] -; ATOM-NEXT: shrdq $7, %rsi, (%rdx) # sched: [9:4.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_shld_shrd_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: shldq %cl, %rsi, %rdi # sched: [1:1.00] -; SLM-NEXT: shrdq %cl, %rsi, %rdi # sched: [1:1.00] -; SLM-NEXT: shldq %cl, %rsi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shldq $7, %rsi, %rdi # sched: [1:1.00] -; SLM-NEXT: shrdq $7, %rsi, %rdi # sched: [1:1.00] -; SLM-NEXT: shldq $7, %rsi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: shrdq $7, %rsi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_shld_shrd_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: shldq %cl, %rsi, %rdi # sched: [4:1.50] -; SANDY-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:1.50] -; SANDY-NEXT: shldq %cl, %rsi, (%rdx) # sched: [10:1.50] -; SANDY-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [10:1.50] -; SANDY-NEXT: shldq $7, %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: shrdq $7, %rsi, %rdi # sched: [2:0.67] -; SANDY-NEXT: shldq $7, %rsi, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: shrdq $7, %rsi, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_shld_shrd_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: shldq %cl, %rsi, %rdi # sched: [6:1.00] -; HASWELL-NEXT: shrdq %cl, %rsi, %rdi # sched: [6:1.00] -; HASWELL-NEXT: shldq %cl, %rsi, (%rdx) # sched: [12:1.00] -; HASWELL-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [12:1.00] -; HASWELL-NEXT: shldq $7, %rsi, %rdi # sched: [3:1.00] -; HASWELL-NEXT: shrdq $7, %rsi, %rdi # sched: [3:1.00] -; HASWELL-NEXT: shldq $7, %rsi, (%rdx) # sched: [10:1.00] -; HASWELL-NEXT: shrdq $7, %rsi, (%rdx) # sched: [10:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_shld_shrd_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: shldq %cl, %rsi, %rdi # sched: [6:1.00] -; BROADWELL-NEXT: shrdq %cl, %rsi, %rdi # sched: [6:1.00] -; BROADWELL-NEXT: shldq %cl, %rsi, (%rdx) # sched: [11:1.00] -; BROADWELL-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [11:1.00] -; BROADWELL-NEXT: shldq $7, %rsi, %rdi # sched: [3:1.00] -; BROADWELL-NEXT: shrdq $7, %rsi, %rdi # sched: [3:1.00] -; BROADWELL-NEXT: shldq $7, %rsi, (%rdx) # sched: [9:1.00] -; BROADWELL-NEXT: shrdq $7, %rsi, (%rdx) # sched: [9:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_shld_shrd_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: shldq %cl, %rsi, %rdi # sched: [6:1.00] -; SKYLAKE-NEXT: shrdq %cl, %rsi, %rdi # sched: [6:1.00] -; SKYLAKE-NEXT: shldq %cl, %rsi, (%rdx) # sched: [11:1.00] -; SKYLAKE-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [11:1.00] -; SKYLAKE-NEXT: shldq $7, %rsi, %rdi # sched: [3:1.00] -; SKYLAKE-NEXT: shrdq $7, %rsi, %rdi # sched: [3:1.00] -; SKYLAKE-NEXT: shldq $7, %rsi, (%rdx) # sched: [9:1.00] -; SKYLAKE-NEXT: shrdq $7, %rsi, (%rdx) # sched: [9:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_shld_shrd_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: shldq %cl, %rsi, %rdi # sched: [6:1.00] -; SKX-NEXT: shrdq %cl, %rsi, %rdi # sched: [6:1.00] -; SKX-NEXT: shldq %cl, %rsi, (%rdx) # sched: [11:1.00] -; SKX-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [11:1.00] -; SKX-NEXT: shldq $7, %rsi, %rdi # sched: [3:1.00] -; SKX-NEXT: shrdq $7, %rsi, %rdi # sched: [3:1.00] -; SKX-NEXT: shldq $7, %rsi, (%rdx) # sched: [9:1.00] -; SKX-NEXT: shrdq $7, %rsi, (%rdx) # sched: [9:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_shld_shrd_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: shldq %cl, %rsi, %rdi # sched: [4:4.00] -; BDVER2-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:4.00] -; BDVER2-NEXT: shldq %cl, %rsi, (%rdx) # sched: [4:11.00] -; BDVER2-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [4:11.00] -; BDVER2-NEXT: shldq $7, %rsi, %rdi # sched: [4:3.00] -; BDVER2-NEXT: shrdq $7, %rsi, %rdi # sched: [4:3.00] -; BDVER2-NEXT: shldq $7, %rsi, (%rdx) # sched: [4:11.00] -; BDVER2-NEXT: shrdq $7, %rsi, (%rdx) # sched: [4:11.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_shld_shrd_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: shldq %cl, %rsi, %rdi # sched: [4:4.00] -; BTVER2-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:4.00] -; BTVER2-NEXT: shldq %cl, %rsi, (%rdx) # sched: [9:11.00] -; BTVER2-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [9:11.00] -; BTVER2-NEXT: shldq $7, %rsi, %rdi # sched: [3:3.00] -; BTVER2-NEXT: shrdq $7, %rsi, %rdi # sched: [3:3.00] -; BTVER2-NEXT: shldq $7, %rsi, (%rdx) # sched: [9:11.00] -; BTVER2-NEXT: shrdq $7, %rsi, (%rdx) # sched: [9:11.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_shld_shrd_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: shldq %cl, %rsi, %rdi # sched: [100:0.25] -; ZNVER1-NEXT: shrdq %cl, %rsi, %rdi # sched: [100:0.25] -; ZNVER1-NEXT: shldq %cl, %rsi, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: shldq $7, %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: shrdq $7, %rsi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: shldq $7, %rsi, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: shrdq $7, %rsi, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "shld $1, $0 \0A\09 shrd $1, $0 \0A\09 shld $1, $2 \0A\09 shrd $1, $2 \0A\09 shld $3, $1, $0 \0A\09 shrd $3, $1, $0 \0A\09 shld $3, $1, $2 \0A\09 shrd $3, $1, $2", "r,r,*m,i"(i64 %a0, i64 %a1, i64 *%a2, i8 7) - ret void -} - -; TODO - test_sidt -; TODO - test_sldt -; TODO - test_smsw - -define void @test_stc_std() optsize { -; GENERIC-LABEL: test_stc_std: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: stc # sched: [1:0.33] -; GENERIC-NEXT: std # sched: [1:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_stc_std: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: stc # sched: [1:0.50] -; ATOM-NEXT: std # sched: [21:10.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_stc_std: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: stc # sched: [1:0.50] -; SLM-NEXT: std # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_stc_std: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: stc # sched: [1:0.33] -; SANDY-NEXT: std # sched: [1:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_stc_std: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: stc # sched: [1:0.25] -; HASWELL-NEXT: std # sched: [6:1.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_stc_std: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: stc # sched: [1:0.25] -; BROADWELL-NEXT: std # sched: [6:1.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_stc_std: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: stc # sched: [1:0.25] -; SKYLAKE-NEXT: std # sched: [6:1.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_stc_std: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: stc # sched: [1:0.25] -; SKX-NEXT: std # sched: [6:1.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_stc_std: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: stc # sched: [1:0.50] -; BDVER2-NEXT: std # sched: [1:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_stc_std: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: stc # sched: [1:0.50] -; BTVER2-NEXT: std # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_stc_std: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: stc # sched: [1:0.25] -; ZNVER1-NEXT: std # sched: [1:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "stc \0A\09 std", ""() - ret void -} - -; TODO - test_sti -; TODO - test_stgi - -define void @test_stos() optsize { -; GENERIC-LABEL: test_stos: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: stosb %al, %es:(%rdi) # sched: [5:1.00] -; GENERIC-NEXT: stosw %ax, %es:(%rdi) # sched: [5:1.00] -; GENERIC-NEXT: stosl %eax, %es:(%rdi) # sched: [5:1.00] -; GENERIC-NEXT: stosq %rax, %es:(%rdi) # sched: [5:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_stos: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: stosb %al, %es:(%rdi) # sched: [1:0.50] -; ATOM-NEXT: stosw %ax, %es:(%rdi) # sched: [1:0.50] -; ATOM-NEXT: stosl %eax, %es:(%rdi) # sched: [1:0.50] -; ATOM-NEXT: stosq %rax, %es:(%rdi) # sched: [1:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_stos: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: stosb %al, %es:(%rdi) # sched: [100:1.00] -; SLM-NEXT: stosw %ax, %es:(%rdi) # sched: [100:1.00] -; SLM-NEXT: stosl %eax, %es:(%rdi) # sched: [100:1.00] -; SLM-NEXT: stosq %rax, %es:(%rdi) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_stos: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: stosb %al, %es:(%rdi) # sched: [5:1.00] -; SANDY-NEXT: stosw %ax, %es:(%rdi) # sched: [5:1.00] -; SANDY-NEXT: stosl %eax, %es:(%rdi) # sched: [5:1.00] -; SANDY-NEXT: stosq %rax, %es:(%rdi) # sched: [5:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_stos: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: stosb %al, %es:(%rdi) # sched: [2:1.00] -; HASWELL-NEXT: stosw %ax, %es:(%rdi) # sched: [2:1.00] -; HASWELL-NEXT: stosl %eax, %es:(%rdi) # sched: [2:1.00] -; HASWELL-NEXT: stosq %rax, %es:(%rdi) # sched: [2:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_stos: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: stosb %al, %es:(%rdi) # sched: [2:1.00] -; BROADWELL-NEXT: stosw %ax, %es:(%rdi) # sched: [2:1.00] -; BROADWELL-NEXT: stosl %eax, %es:(%rdi) # sched: [2:1.00] -; BROADWELL-NEXT: stosq %rax, %es:(%rdi) # sched: [2:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_stos: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: stosb %al, %es:(%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: stosw %ax, %es:(%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: stosl %eax, %es:(%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: stosq %rax, %es:(%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_stos: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: stosb %al, %es:(%rdi) # sched: [2:1.00] -; SKX-NEXT: stosw %ax, %es:(%rdi) # sched: [2:1.00] -; SKX-NEXT: stosl %eax, %es:(%rdi) # sched: [2:1.00] -; SKX-NEXT: stosq %rax, %es:(%rdi) # sched: [2:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_stos: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: stosb %al, %es:(%rdi) # sched: [100:0.50] -; BDVER2-NEXT: stosw %ax, %es:(%rdi) # sched: [100:0.50] -; BDVER2-NEXT: stosl %eax, %es:(%rdi) # sched: [100:0.50] -; BDVER2-NEXT: stosq %rax, %es:(%rdi) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_stos: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: stosb %al, %es:(%rdi) # sched: [100:0.50] -; BTVER2-NEXT: stosw %ax, %es:(%rdi) # sched: [100:0.50] -; BTVER2-NEXT: stosl %eax, %es:(%rdi) # sched: [100:0.50] -; BTVER2-NEXT: stosq %rax, %es:(%rdi) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_stos: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: stosb %al, %es:(%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: stosw %ax, %es:(%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: stosl %eax, %es:(%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: stosq %rax, %es:(%rdi) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "stosb \0A\09 stosw \0A\09 stosl \0A\09 stosq", ""() - ret void -} - -; TODO - test_str - -define void @test_sub_8(i8 %a0, i8* %a1, i8 %a2) optsize { -; GENERIC-LABEL: test_sub_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: subb $7, %al # sched: [1:0.33] -; GENERIC-NEXT: subb $7, %dil # sched: [1:0.33] -; GENERIC-NEXT: subb $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: subb %dl, %dil # sched: [1:0.33] -; GENERIC-NEXT: subb %dil, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: subb (%rsi), %dil # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sub_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: subb $7, %al # sched: [1:0.50] -; ATOM-NEXT: subb $7, %dil # sched: [1:0.50] -; ATOM-NEXT: subb $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: subb %dl, %dil # sched: [1:0.50] -; ATOM-NEXT: subb %dil, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: subb (%rsi), %dil # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sub_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: subb $7, %al # sched: [1:0.50] -; SLM-NEXT: subb $7, %dil # sched: [1:0.50] -; SLM-NEXT: subb $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: subb %dl, %dil # sched: [1:0.50] -; SLM-NEXT: subb %dil, (%rsi) # sched: [5:2.00] -; SLM-NEXT: subb (%rsi), %dil # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_sub_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: subb $7, %al # sched: [1:0.33] -; SANDY-NEXT: subb $7, %dil # sched: [1:0.33] -; SANDY-NEXT: subb $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: subb %dl, %dil # sched: [1:0.33] -; SANDY-NEXT: subb %dil, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: subb (%rsi), %dil # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sub_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: subb $7, %al # sched: [1:0.25] -; HASWELL-NEXT: subb $7, %dil # sched: [1:0.25] -; HASWELL-NEXT: subb $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: subb %dl, %dil # sched: [1:0.25] -; HASWELL-NEXT: subb %dil, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: subb (%rsi), %dil # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sub_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: subb $7, %al # sched: [1:0.25] -; BROADWELL-NEXT: subb $7, %dil # sched: [1:0.25] -; BROADWELL-NEXT: subb $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: subb %dl, %dil # sched: [1:0.25] -; BROADWELL-NEXT: subb %dil, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: subb (%rsi), %dil # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sub_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: subb $7, %al # sched: [1:0.25] -; SKYLAKE-NEXT: subb $7, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: subb $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: subb %dl, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: subb %dil, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: subb (%rsi), %dil # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sub_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: subb $7, %al # sched: [1:0.25] -; SKX-NEXT: subb $7, %dil # sched: [1:0.25] -; SKX-NEXT: subb $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: subb %dl, %dil # sched: [1:0.25] -; SKX-NEXT: subb %dil, (%rsi) # sched: [7:1.00] -; SKX-NEXT: subb (%rsi), %dil # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sub_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: subb $7, %al # sched: [1:0.50] -; BDVER2-NEXT: subb $7, %dil # sched: [1:0.50] -; BDVER2-NEXT: subb $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: subb %dl, %dil # sched: [1:0.50] -; BDVER2-NEXT: subb %dil, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: subb (%rsi), %dil # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sub_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: subb $7, %al # sched: [1:0.50] -; BTVER2-NEXT: subb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: subb $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: subb %dl, %dil # sched: [1:0.50] -; BTVER2-NEXT: subb %dil, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: subb (%rsi), %dil # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sub_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: subb $7, %al # sched: [1:0.25] -; ZNVER1-NEXT: subb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: subb $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: subb %dl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: subb %dil, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: subb (%rsi), %dil # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "subb $3, %AL \0A\09 subb $3, $0 \0A\09 subb $3, $2 \0A\09 subb $1, $0 \0A\09 subb $0, $2 \0A\09 subb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind - ret void -} -define void @test_sub_16(i16 %a0, i16* %a1, i16 %a2) optsize { -; GENERIC-LABEL: test_sub_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: subw $511, %ax # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: subw $511, %di # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: subw $511, (%rsi) # imm = 0x1FF -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: subw $7, %di # sched: [1:0.33] -; GENERIC-NEXT: subw $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: subw %dx, %di # sched: [1:0.33] -; GENERIC-NEXT: subw %di, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: subw (%rsi), %di # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sub_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: subw $511, %ax # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: subw $511, %di # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: subw $511, (%rsi) # imm = 0x1FF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: subw $7, %di # sched: [1:0.50] -; ATOM-NEXT: subw $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: subw %dx, %di # sched: [1:0.50] -; ATOM-NEXT: subw %di, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: subw (%rsi), %di # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sub_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: subw $511, %ax # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: subw $511, %di # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: subw $511, (%rsi) # imm = 0x1FF -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: subw $7, %di # sched: [1:0.50] -; SLM-NEXT: subw $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: subw %dx, %di # sched: [1:0.50] -; SLM-NEXT: subw %di, (%rsi) # sched: [5:2.00] -; SLM-NEXT: subw (%rsi), %di # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_sub_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: subw $511, %ax # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: subw $511, %di # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: subw $511, (%rsi) # imm = 0x1FF -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: subw $7, %di # sched: [1:0.33] -; SANDY-NEXT: subw $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: subw %dx, %di # sched: [1:0.33] -; SANDY-NEXT: subw %di, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: subw (%rsi), %di # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sub_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: subw $511, %ax # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: subw $511, %di # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: subw $511, (%rsi) # imm = 0x1FF -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: subw $7, %di # sched: [1:0.25] -; HASWELL-NEXT: subw $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: subw %dx, %di # sched: [1:0.25] -; HASWELL-NEXT: subw %di, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: subw (%rsi), %di # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sub_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: subw $511, %ax # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: subw $511, %di # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: subw $511, (%rsi) # imm = 0x1FF -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: subw $7, %di # sched: [1:0.25] -; BROADWELL-NEXT: subw $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: subw %dx, %di # sched: [1:0.25] -; BROADWELL-NEXT: subw %di, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: subw (%rsi), %di # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sub_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: subw $511, %ax # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: subw $511, %di # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: subw $511, (%rsi) # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: subw $7, %di # sched: [1:0.25] -; SKYLAKE-NEXT: subw $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: subw %dx, %di # sched: [1:0.25] -; SKYLAKE-NEXT: subw %di, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: subw (%rsi), %di # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sub_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: subw $511, %ax # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: subw $511, %di # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: subw $511, (%rsi) # imm = 0x1FF -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: subw $7, %di # sched: [1:0.25] -; SKX-NEXT: subw $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: subw %dx, %di # sched: [1:0.25] -; SKX-NEXT: subw %di, (%rsi) # sched: [7:1.00] -; SKX-NEXT: subw (%rsi), %di # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sub_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: subw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: subw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: subw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: subw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: subw $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: subw %dx, %di # sched: [1:0.50] -; BDVER2-NEXT: subw %di, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: subw (%rsi), %di # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sub_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: subw $511, %ax # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: subw $511, %di # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: subw $511, (%rsi) # imm = 0x1FF -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: subw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: subw $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: subw %dx, %di # sched: [1:0.50] -; BTVER2-NEXT: subw %di, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: subw (%rsi), %di # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sub_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: subw $511, %ax # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: subw $511, %di # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: subw $511, (%rsi) # imm = 0x1FF -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: subw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: subw $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: subw %dx, %di # sched: [1:0.25] -; ZNVER1-NEXT: subw %di, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: subw (%rsi), %di # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "subw $3, %AX \0A\09 subw $3, $0 \0A\09 subw $3, $2 \0A\09 subw $4, $0 \0A\09 subw $4, $2 \0A\09 subw $1, $0 \0A\09 subw $0, $2 \0A\09 subw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind - ret void -} -define void @test_sub_32(i32 %a0, i32* %a1, i32 %a2) optsize { -; GENERIC-LABEL: test_sub_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: subl $665536, %eax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: subl $665536, %edi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: subl $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: subl $7, %edi # sched: [1:0.33] -; GENERIC-NEXT: subl $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: subl %edx, %edi # sched: [1:0.33] -; GENERIC-NEXT: subl %edi, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: subl (%rsi), %edi # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sub_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: subl $665536, %eax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: subl $665536, %edi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: subl $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: subl $7, %edi # sched: [1:0.50] -; ATOM-NEXT: subl $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: subl %edx, %edi # sched: [1:0.50] -; ATOM-NEXT: subl %edi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: subl (%rsi), %edi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sub_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: subl $665536, %eax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: subl $665536, %edi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: subl $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: subl $7, %edi # sched: [1:0.50] -; SLM-NEXT: subl $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: subl %edx, %edi # sched: [1:0.50] -; SLM-NEXT: subl %edi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: subl (%rsi), %edi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_sub_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: subl $665536, %eax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: subl $665536, %edi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: subl $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: subl $7, %edi # sched: [1:0.33] -; SANDY-NEXT: subl $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: subl %edx, %edi # sched: [1:0.33] -; SANDY-NEXT: subl %edi, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: subl (%rsi), %edi # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sub_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: subl $665536, %eax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: subl $665536, %edi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: subl $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: subl $7, %edi # sched: [1:0.25] -; HASWELL-NEXT: subl $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: subl %edx, %edi # sched: [1:0.25] -; HASWELL-NEXT: subl %edi, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: subl (%rsi), %edi # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sub_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: subl $665536, %eax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: subl $665536, %edi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: subl $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: subl $7, %edi # sched: [1:0.25] -; BROADWELL-NEXT: subl $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: subl %edx, %edi # sched: [1:0.25] -; BROADWELL-NEXT: subl %edi, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: subl (%rsi), %edi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sub_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: subl $665536, %eax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: subl $665536, %edi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: subl $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: subl $7, %edi # sched: [1:0.25] -; SKYLAKE-NEXT: subl $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: subl %edx, %edi # sched: [1:0.25] -; SKYLAKE-NEXT: subl %edi, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: subl (%rsi), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sub_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: subl $665536, %eax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: subl $665536, %edi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: subl $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: subl $7, %edi # sched: [1:0.25] -; SKX-NEXT: subl $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: subl %edx, %edi # sched: [1:0.25] -; SKX-NEXT: subl %edi, (%rsi) # sched: [7:1.00] -; SKX-NEXT: subl (%rsi), %edi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sub_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: subl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: subl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: subl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: subl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: subl $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: subl %edx, %edi # sched: [1:0.50] -; BDVER2-NEXT: subl %edi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: subl (%rsi), %edi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sub_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: subl $665536, %eax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: subl $665536, %edi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: subl $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: subl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: subl $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: subl %edx, %edi # sched: [1:0.50] -; BTVER2-NEXT: subl %edi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: subl (%rsi), %edi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sub_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: subl $665536, %eax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: subl $665536, %edi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: subl $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: subl $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: subl $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: subl %edx, %edi # sched: [1:0.25] -; ZNVER1-NEXT: subl %edi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: subl (%rsi), %edi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "subl $3, %EAX \0A\09 subl $3, $0 \0A\09 subl $3, $2 \0A\09 subl $4, $0 \0A\09 subl $4, $2 \0A\09 subl $1, $0 \0A\09 subl $0, $2 \0A\09 subl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind - ret void -} -define void @test_sub_64(i64 %a0, i64* %a1, i64 %a2) optsize { -; GENERIC-LABEL: test_sub_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: subq $665536, %rax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: subq $665536, %rdi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: subq $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: subq $7, %rdi # sched: [1:0.33] -; GENERIC-NEXT: subq $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: subq %rdx, %rdi # sched: [1:0.33] -; GENERIC-NEXT: subq %rdi, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: subq (%rsi), %rdi # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sub_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: subq $665536, %rax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: subq $665536, %rdi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: subq $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: subq $7, %rdi # sched: [1:0.50] -; ATOM-NEXT: subq $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: subq %rdx, %rdi # sched: [1:0.50] -; ATOM-NEXT: subq %rdi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: subq (%rsi), %rdi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sub_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: subq $665536, %rax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: subq $665536, %rdi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: subq $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: subq $7, %rdi # sched: [1:0.50] -; SLM-NEXT: subq $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: subq %rdx, %rdi # sched: [1:0.50] -; SLM-NEXT: subq %rdi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: subq (%rsi), %rdi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_sub_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: subq $665536, %rax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: subq $665536, %rdi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: subq $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: subq $7, %rdi # sched: [1:0.33] -; SANDY-NEXT: subq $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: subq %rdx, %rdi # sched: [1:0.33] -; SANDY-NEXT: subq %rdi, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: subq (%rsi), %rdi # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_sub_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: subq $665536, %rax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: subq $665536, %rdi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: subq $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: subq $7, %rdi # sched: [1:0.25] -; HASWELL-NEXT: subq $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: subq %rdx, %rdi # sched: [1:0.25] -; HASWELL-NEXT: subq %rdi, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: subq (%rsi), %rdi # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sub_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: subq $665536, %rax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: subq $665536, %rdi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: subq $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: subq $7, %rdi # sched: [1:0.25] -; BROADWELL-NEXT: subq $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: subq %rdx, %rdi # sched: [1:0.25] -; BROADWELL-NEXT: subq %rdi, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: subq (%rsi), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sub_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: subq $665536, %rax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: subq $665536, %rdi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: subq $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: subq $7, %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: subq $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: subq %rdx, %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: subq %rdi, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: subq (%rsi), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sub_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: subq $665536, %rax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: subq $665536, %rdi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: subq $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: subq $7, %rdi # sched: [1:0.25] -; SKX-NEXT: subq $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: subq %rdx, %rdi # sched: [1:0.25] -; SKX-NEXT: subq %rdi, (%rsi) # sched: [7:1.00] -; SKX-NEXT: subq (%rsi), %rdi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_sub_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: subq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: subq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: subq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: subq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: subq $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: subq %rdx, %rdi # sched: [1:0.50] -; BDVER2-NEXT: subq %rdi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: subq (%rsi), %rdi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_sub_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: subq $665536, %rax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: subq $665536, %rdi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: subq $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: subq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: subq $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: subq %rdx, %rdi # sched: [1:0.50] -; BTVER2-NEXT: subq %rdi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: subq (%rsi), %rdi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_sub_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: subq $665536, %rax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: subq $665536, %rdi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: subq $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: subq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: subq $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: subq %rdx, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: subq %rdi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: subq (%rsi), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "subq $3, %RAX \0A\09 subq $3, $0 \0A\09 subq $3, $2 \0A\09 subq $4, $0 \0A\09 subq $4, $2 \0A\09 subq $1, $0 \0A\09 subq $0, $2 \0A\09 subq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind - ret void -} - -; TODO - test_swapgs -; TODO - test_syscall -; TODO - test_sysenter -; TODO - test_sysexit -; TODO - test_sysret - -define void @test_test_8(i8 %a0, i8* %a1) optsize { -; GENERIC-LABEL: test_test_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: testb $7, %al # sched: [1:0.33] -; GENERIC-NEXT: testb $7, %dil # sched: [1:0.33] -; GENERIC-NEXT: testb $7, (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: testb %dil, %dil # sched: [1:0.33] -; GENERIC-NEXT: testb %dil, (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_test_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: testb $7, %al # sched: [1:0.50] -; ATOM-NEXT: testb $7, %dil # sched: [1:0.50] -; ATOM-NEXT: testb $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: testb %dil, %dil # sched: [1:0.50] -; ATOM-NEXT: testb %dil, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_test_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: testb $7, %al # sched: [1:0.50] -; SLM-NEXT: testb $7, %dil # sched: [1:0.50] -; SLM-NEXT: testb $7, (%rsi) # sched: [4:1.00] -; SLM-NEXT: testb %dil, %dil # sched: [1:0.50] -; SLM-NEXT: testb %dil, (%rsi) # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_test_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: testb $7, %al # sched: [1:0.33] -; SANDY-NEXT: testb $7, %dil # sched: [1:0.33] -; SANDY-NEXT: testb $7, (%rsi) # sched: [6:0.50] -; SANDY-NEXT: testb %dil, %dil # sched: [1:0.33] -; SANDY-NEXT: testb %dil, (%rsi) # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_test_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: testb $7, %al # sched: [1:0.25] -; HASWELL-NEXT: testb $7, %dil # sched: [1:0.25] -; HASWELL-NEXT: testb $7, (%rsi) # sched: [6:0.50] -; HASWELL-NEXT: testb %dil, %dil # sched: [1:0.25] -; HASWELL-NEXT: testb %dil, (%rsi) # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_test_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: testb $7, %al # sched: [1:0.25] -; BROADWELL-NEXT: testb $7, %dil # sched: [1:0.25] -; BROADWELL-NEXT: testb $7, (%rsi) # sched: [6:0.50] -; BROADWELL-NEXT: testb %dil, %dil # sched: [1:0.25] -; BROADWELL-NEXT: testb %dil, (%rsi) # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_test_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: testb $7, %al # sched: [1:0.25] -; SKYLAKE-NEXT: testb $7, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: testb $7, (%rsi) # sched: [6:0.50] -; SKYLAKE-NEXT: testb %dil, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: testb %dil, (%rsi) # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_test_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: testb $7, %al # sched: [1:0.25] -; SKX-NEXT: testb $7, %dil # sched: [1:0.25] -; SKX-NEXT: testb $7, (%rsi) # sched: [6:0.50] -; SKX-NEXT: testb %dil, %dil # sched: [1:0.25] -; SKX-NEXT: testb %dil, (%rsi) # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_test_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: testb $7, %al # sched: [1:0.50] -; BDVER2-NEXT: testb $7, %dil # sched: [1:0.50] -; BDVER2-NEXT: testb $7, (%rsi) # sched: [5:0.50] -; BDVER2-NEXT: testb %dil, %dil # sched: [1:0.50] -; BDVER2-NEXT: testb %dil, (%rsi) # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_test_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: testb $7, %al # sched: [1:0.50] -; BTVER2-NEXT: testb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: testb $7, (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: testb %dil, %dil # sched: [1:0.50] -; BTVER2-NEXT: testb %dil, (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_test_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: testb $7, %al # sched: [1:0.25] -; ZNVER1-NEXT: testb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: testb $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: testb %dil, %dil # sched: [1:0.25] -; ZNVER1-NEXT: testb %dil, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "testb $2, %AL \0A\09 testb $2, $0 \0A\09 testb $2, $1 \0A\09 testb $0, $0 \0A\09 testb $0, $1", "r,*m,i"(i8 %a0, i8* %a1, i8 7) nounwind - ret void -} -define void @test_test_16(i16 %a0, i16* %a1) optsize { -; GENERIC-LABEL: test_test_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: testw $511, %ax # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: testw $511, %di # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: testw $511, (%rsi) # imm = 0x1FF -; GENERIC-NEXT: # sched: [6:0.50] -; GENERIC-NEXT: testw %di, %di # sched: [1:0.33] -; GENERIC-NEXT: testw %di, (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_test_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: testw $511, %ax # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: testw $511, %di # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: testw $511, (%rsi) # imm = 0x1FF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: testw %di, %di # sched: [1:0.50] -; ATOM-NEXT: testw %di, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_test_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: testw $511, %ax # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: testw $511, %di # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: testw $511, (%rsi) # imm = 0x1FF -; SLM-NEXT: # sched: [4:1.00] -; SLM-NEXT: testw %di, %di # sched: [1:0.50] -; SLM-NEXT: testw %di, (%rsi) # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_test_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: testw $511, %ax # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: testw $511, %di # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: testw $511, (%rsi) # imm = 0x1FF -; SANDY-NEXT: # sched: [6:0.50] -; SANDY-NEXT: testw %di, %di # sched: [1:0.33] -; SANDY-NEXT: testw %di, (%rsi) # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_test_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: testw $511, %ax # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: testw $511, %di # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: testw $511, (%rsi) # imm = 0x1FF -; HASWELL-NEXT: # sched: [6:0.50] -; HASWELL-NEXT: testw %di, %di # sched: [1:0.25] -; HASWELL-NEXT: testw %di, (%rsi) # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_test_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: testw $511, %ax # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: testw $511, %di # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: testw $511, (%rsi) # imm = 0x1FF -; BROADWELL-NEXT: # sched: [6:0.50] -; BROADWELL-NEXT: testw %di, %di # sched: [1:0.25] -; BROADWELL-NEXT: testw %di, (%rsi) # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_test_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: testw $511, %ax # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: testw $511, %di # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: testw $511, (%rsi) # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [6:0.50] -; SKYLAKE-NEXT: testw %di, %di # sched: [1:0.25] -; SKYLAKE-NEXT: testw %di, (%rsi) # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_test_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: testw $511, %ax # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: testw $511, %di # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: testw $511, (%rsi) # imm = 0x1FF -; SKX-NEXT: # sched: [6:0.50] -; SKX-NEXT: testw %di, %di # sched: [1:0.25] -; SKX-NEXT: testw %di, (%rsi) # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_test_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: testw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: testw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: testw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [5:0.50] -; BDVER2-NEXT: testw %di, %di # sched: [1:0.50] -; BDVER2-NEXT: testw %di, (%rsi) # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_test_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: testw $511, %ax # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: testw $511, %di # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: testw $511, (%rsi) # imm = 0x1FF -; BTVER2-NEXT: # sched: [4:1.00] -; BTVER2-NEXT: testw %di, %di # sched: [1:0.50] -; BTVER2-NEXT: testw %di, (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_test_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: testw $511, %ax # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: testw $511, %di # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: testw $511, (%rsi) # imm = 0x1FF -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: testw %di, %di # sched: [1:0.25] -; ZNVER1-NEXT: testw %di, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "testw $2, %AX \0A\09 testw $2, $0 \0A\09 testw $2, $1 \0A\09 testw $0, $0 \0A\09 testw $0, $1", "r,*m,i"(i16 %a0, i16* %a1, i16 511) nounwind - ret void -} -define void @test_test_32(i32 %a0, i32* %a1) optsize { -; GENERIC-LABEL: test_test_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: testl $665536, %eax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: testl $665536, %edi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: testl $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [6:0.50] -; GENERIC-NEXT: testl %edi, %edi # sched: [1:0.33] -; GENERIC-NEXT: testl %edi, (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_test_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: testl $665536, %eax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: testl $665536, %edi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: testl $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: testl %edi, %edi # sched: [1:0.50] -; ATOM-NEXT: testl %edi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_test_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: testl $665536, %eax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: testl $665536, %edi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: testl $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [4:1.00] -; SLM-NEXT: testl %edi, %edi # sched: [1:0.50] -; SLM-NEXT: testl %edi, (%rsi) # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_test_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: testl $665536, %eax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: testl $665536, %edi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: testl $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [6:0.50] -; SANDY-NEXT: testl %edi, %edi # sched: [1:0.33] -; SANDY-NEXT: testl %edi, (%rsi) # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_test_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: testl $665536, %eax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: testl $665536, %edi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: testl $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [6:0.50] -; HASWELL-NEXT: testl %edi, %edi # sched: [1:0.25] -; HASWELL-NEXT: testl %edi, (%rsi) # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_test_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: testl $665536, %eax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: testl $665536, %edi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: testl $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [6:0.50] -; BROADWELL-NEXT: testl %edi, %edi # sched: [1:0.25] -; BROADWELL-NEXT: testl %edi, (%rsi) # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_test_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: testl $665536, %eax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: testl $665536, %edi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: testl $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [6:0.50] -; SKYLAKE-NEXT: testl %edi, %edi # sched: [1:0.25] -; SKYLAKE-NEXT: testl %edi, (%rsi) # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_test_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: testl $665536, %eax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: testl $665536, %edi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: testl $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [6:0.50] -; SKX-NEXT: testl %edi, %edi # sched: [1:0.25] -; SKX-NEXT: testl %edi, (%rsi) # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_test_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: testl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: testl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: testl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [5:0.50] -; BDVER2-NEXT: testl %edi, %edi # sched: [1:0.50] -; BDVER2-NEXT: testl %edi, (%rsi) # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_test_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: testl $665536, %eax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: testl $665536, %edi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: testl $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [4:1.00] -; BTVER2-NEXT: testl %edi, %edi # sched: [1:0.50] -; BTVER2-NEXT: testl %edi, (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_test_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: testl $665536, %eax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: testl $665536, %edi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: testl $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: testl %edi, %edi # sched: [1:0.25] -; ZNVER1-NEXT: testl %edi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "testl $2, %EAX \0A\09 testl $2, $0 \0A\09 testl $2, $1 \0A\09 testl $0, $0 \0A\09 testl $0, $1", "r,*m,i"(i32 %a0, i32* %a1, i32 665536) nounwind - ret void -} -define void @test_test_64(i64 %a0, i64* %a1) optsize { -; GENERIC-LABEL: test_test_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: testq $665536, %rax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: testq $665536, %rdi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: testq $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [6:0.50] -; GENERIC-NEXT: testq %rdi, %rdi # sched: [1:0.33] -; GENERIC-NEXT: testq %rdi, (%rsi) # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_test_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: testq $665536, %rax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: testq $665536, %rdi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: testq $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: testq %rdi, %rdi # sched: [1:0.50] -; ATOM-NEXT: testq %rdi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_test_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: testq $665536, %rax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: testq $665536, %rdi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: testq $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [4:1.00] -; SLM-NEXT: testq %rdi, %rdi # sched: [1:0.50] -; SLM-NEXT: testq %rdi, (%rsi) # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_test_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: testq $665536, %rax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: testq $665536, %rdi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: testq $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [6:0.50] -; SANDY-NEXT: testq %rdi, %rdi # sched: [1:0.33] -; SANDY-NEXT: testq %rdi, (%rsi) # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_test_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: testq $665536, %rax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: testq $665536, %rdi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: testq $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [6:0.50] -; HASWELL-NEXT: testq %rdi, %rdi # sched: [1:0.25] -; HASWELL-NEXT: testq %rdi, (%rsi) # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_test_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: testq $665536, %rax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: testq $665536, %rdi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: testq $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [6:0.50] -; BROADWELL-NEXT: testq %rdi, %rdi # sched: [1:0.25] -; BROADWELL-NEXT: testq %rdi, (%rsi) # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_test_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: testq $665536, %rax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: testq $665536, %rdi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: testq $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [6:0.50] -; SKYLAKE-NEXT: testq %rdi, %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: testq %rdi, (%rsi) # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_test_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: testq $665536, %rax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: testq $665536, %rdi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: testq $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [6:0.50] -; SKX-NEXT: testq %rdi, %rdi # sched: [1:0.25] -; SKX-NEXT: testq %rdi, (%rsi) # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_test_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: testq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: testq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: testq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [5:0.50] -; BDVER2-NEXT: testq %rdi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: testq %rdi, (%rsi) # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_test_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: testq $665536, %rax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: testq $665536, %rdi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: testq $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [4:1.00] -; BTVER2-NEXT: testq %rdi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: testq %rdi, (%rsi) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_test_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: testq $665536, %rax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: testq $665536, %rdi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: testq $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: testq %rdi, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: testq %rdi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "testq $2, %RAX \0A\09 testq $2, $0 \0A\09 testq $2, $1 \0A\09 testq $0, $0 \0A\09 testq $0, $1", "r,*m,i"(i64 %a0, i64* %a1, i32 665536) nounwind - ret void -} - -; TODO: ud0, ud1 -define void @test_ud2() optsize { -; GENERIC-LABEL: test_ud2: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: ud2 # sched: [100:0.33] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_ud2: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: ud2 # sched: [100:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_ud2: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: ud2 # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_ud2: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: ud2 # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_ud2: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: ud2 # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ud2: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: ud2 # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ud2: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: ud2 # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_ud2: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: ud2 # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_ud2: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: ud2 # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_ud2: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: ud2 # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ud2: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: ud2 # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "ud2", ""() - ret void -} - -; TODO - test_verr -; TODO - test_verw -; TODO - test_vmload -; TODO - test_vmmcall -; TODO - test_vmrun -; TODO - test_vmsave -; TODO - test_wbinvd - -define void @test_xadd_8(i8 %a0, i8 %a1, i8 *%a2) optsize { -; GENERIC-LABEL: test_xadd_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xaddb %dil, %sil # sched: [2:1.00] -; GENERIC-NEXT: xaddb %dil, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xadd_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: xaddb %dil, %sil # sched: [2:1.00] -; ATOM-NEXT: xaddb %dil, (%rdx) # sched: [3:1.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xadd_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: xaddb %dil, %sil # sched: [1:0.50] -; SLM-NEXT: xaddb %dil, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_xadd_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: xaddb %dil, %sil # sched: [2:1.00] -; SANDY-NEXT: xaddb %dil, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xadd_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xaddb %dil, %sil # sched: [2:0.75] -; HASWELL-NEXT: xaddb %dil, (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xadd_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xaddb %dil, %sil # sched: [2:0.75] -; BROADWELL-NEXT: xaddb %dil, (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xadd_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xaddb %dil, %sil # sched: [2:0.75] -; SKYLAKE-NEXT: xaddb %dil, (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xadd_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: xaddb %dil, %sil # sched: [2:0.75] -; SKX-NEXT: xaddb %dil, (%rdx) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xadd_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xaddb %dil, %sil # sched: [2:1.00] -; BDVER2-NEXT: xaddb %dil, (%rdx) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xadd_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xaddb %dil, %sil # sched: [1:0.50] -; BTVER2-NEXT: xaddb %dil, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xadd_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xaddb %dil, %sil # sched: [1:0.25] -; ZNVER1-NEXT: xaddb %dil, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "xaddb $0, $1 \0A\09 xaddb $0, $2", "r,r,*m"(i8 %a0, i8 %a1, i8 *%a2) nounwind - ret void -} -define void @test_xadd_16(i16 %a0, i16 %a1, i16 *%a2) optsize { -; GENERIC-LABEL: test_xadd_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xaddw %di, %si # sched: [2:1.00] -; GENERIC-NEXT: xaddw %di, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xadd_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: xaddw %di, %si # sched: [2:1.00] -; ATOM-NEXT: xaddw %di, (%rdx) # sched: [3:1.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xadd_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: xaddw %di, %si # sched: [1:0.50] -; SLM-NEXT: xaddw %di, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_xadd_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: xaddw %di, %si # sched: [2:1.00] -; SANDY-NEXT: xaddw %di, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xadd_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xaddw %di, %si # sched: [2:0.75] -; HASWELL-NEXT: xaddw %di, (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xadd_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xaddw %di, %si # sched: [2:0.75] -; BROADWELL-NEXT: xaddw %di, (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xadd_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xaddw %di, %si # sched: [2:0.75] -; SKYLAKE-NEXT: xaddw %di, (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xadd_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: xaddw %di, %si # sched: [2:0.75] -; SKX-NEXT: xaddw %di, (%rdx) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xadd_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xaddw %di, %si # sched: [2:1.00] -; BDVER2-NEXT: xaddw %di, (%rdx) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xadd_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xaddw %di, %si # sched: [1:0.50] -; BTVER2-NEXT: xaddw %di, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xadd_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xaddw %di, %si # sched: [1:0.25] -; ZNVER1-NEXT: xaddw %di, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "xaddw $0, $1 \0A\09 xaddw $0, $2", "r,r,*m"(i16 %a0, i16 %a1, i16 *%a2) nounwind - ret void -} -define void @test_xadd_32(i32 %a0, i32 %a1, i32 *%a2) optsize { -; GENERIC-LABEL: test_xadd_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xaddl %edi, %esi # sched: [2:1.00] -; GENERIC-NEXT: xaddl %edi, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xadd_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: xaddl %edi, %esi # sched: [2:1.00] -; ATOM-NEXT: xaddl %edi, (%rdx) # sched: [3:1.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xadd_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: xaddl %edi, %esi # sched: [1:0.50] -; SLM-NEXT: xaddl %edi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_xadd_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: xaddl %edi, %esi # sched: [2:1.00] -; SANDY-NEXT: xaddl %edi, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xadd_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xaddl %edi, %esi # sched: [2:0.75] -; HASWELL-NEXT: xaddl %edi, (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xadd_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xaddl %edi, %esi # sched: [2:0.75] -; BROADWELL-NEXT: xaddl %edi, (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xadd_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xaddl %edi, %esi # sched: [2:0.75] -; SKYLAKE-NEXT: xaddl %edi, (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xadd_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: xaddl %edi, %esi # sched: [2:0.75] -; SKX-NEXT: xaddl %edi, (%rdx) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xadd_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xaddl %edi, %esi # sched: [2:1.00] -; BDVER2-NEXT: xaddl %edi, (%rdx) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xadd_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xaddl %edi, %esi # sched: [1:0.50] -; BTVER2-NEXT: xaddl %edi, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xadd_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xaddl %edi, %esi # sched: [1:0.25] -; ZNVER1-NEXT: xaddl %edi, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "xaddl $0, $1 \0A\09 xaddl $0, $2", "r,r,*m"(i32 %a0, i32 %a1, i32 *%a2) nounwind - ret void -} -define void @test_xadd_64(i64 %a0, i64 %a1, i64 *%a2) optsize { -; GENERIC-LABEL: test_xadd_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xaddq %rdi, %rsi # sched: [2:1.00] -; GENERIC-NEXT: xaddq %rdi, (%rdx) # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xadd_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: xaddq %rdi, %rsi # sched: [2:1.00] -; ATOM-NEXT: xaddq %rdi, (%rdx) # sched: [3:1.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xadd_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: xaddq %rdi, %rsi # sched: [1:0.50] -; SLM-NEXT: xaddq %rdi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_xadd_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: xaddq %rdi, %rsi # sched: [2:1.00] -; SANDY-NEXT: xaddq %rdi, (%rdx) # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xadd_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xaddq %rdi, %rsi # sched: [2:0.75] -; HASWELL-NEXT: xaddq %rdi, (%rdx) # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xadd_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xaddq %rdi, %rsi # sched: [2:0.75] -; BROADWELL-NEXT: xaddq %rdi, (%rdx) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xadd_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xaddq %rdi, %rsi # sched: [2:0.75] -; SKYLAKE-NEXT: xaddq %rdi, (%rdx) # sched: [7:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xadd_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: xaddq %rdi, %rsi # sched: [2:0.75] -; SKX-NEXT: xaddq %rdi, (%rdx) # sched: [7:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xadd_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xaddq %rdi, %rsi # sched: [2:1.00] -; BDVER2-NEXT: xaddq %rdi, (%rdx) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xadd_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xaddq %rdi, %rsi # sched: [1:0.50] -; BTVER2-NEXT: xaddq %rdi, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xadd_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xaddq %rdi, %rsi # sched: [1:0.25] -; ZNVER1-NEXT: xaddq %rdi, (%rdx) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "xaddq $0, $1 \0A\09 xaddq $0, $2", "r,r,*m"(i64 %a0, i64 %a1, i64 *%a2) nounwind - ret void -} - -define void @test_xchg_8(i8 %a0, i8 %a1, i8 *%a2) optsize { -; GENERIC-LABEL: test_xchg_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xchgb %sil, %dil # sched: [2:1.00] -; GENERIC-NEXT: xchgb %dil, (%rdx) # sched: [6:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xchg_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: xchgb %sil, %dil # sched: [2:1.00] -; ATOM-NEXT: xchgb %dil, (%rdx) # sched: [3:1.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xchg_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: xchgb %sil, %dil # sched: [1:0.50] -; SLM-NEXT: xchgb %dil, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_xchg_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: xchgb %sil, %dil # sched: [2:1.00] -; SANDY-NEXT: xchgb %dil, (%rdx) # sched: [6:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xchg_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xchgb %sil, %dil # sched: [2:0.75] -; HASWELL-NEXT: xchgb %dil, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xchg_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xchgb %sil, %dil # sched: [2:0.75] -; BROADWELL-NEXT: xchgb %dil, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xchg_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xchgb %sil, %dil # sched: [2:0.75] -; SKYLAKE-NEXT: xchgb %dil, (%rdx) # sched: [10:1.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xchg_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: xchgb %sil, %dil # sched: [2:0.75] -; SKX-NEXT: xchgb %dil, (%rdx) # sched: [10:1.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xchg_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xchgb %sil, %dil # sched: [1:1.00] -; BDVER2-NEXT: xchgb %dil, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xchg_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xchgb %sil, %dil # sched: [1:0.50] -; BTVER2-NEXT: xchgb %dil, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xchg_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xchgb %sil, %dil # sched: [1:0.50] -; ZNVER1-NEXT: xchgb %dil, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "xchg $1, $0 \0A\09 xchg $2, $0", "r,r,*m"(i8 %a0, i8 %a1, i8 *%a2) nounwind - ret void -} -define void @test_xchg_16(i16 %a0, i16 %a1, i16 *%a2) optsize { -; GENERIC-LABEL: test_xchg_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xchgw %di, %ax # sched: [2:1.00] -; GENERIC-NEXT: xchgw %si, %di # sched: [2:1.00] -; GENERIC-NEXT: xchgw %di, (%rdx) # sched: [6:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xchg_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: xchgw %di, %ax # sched: [2:1.00] -; ATOM-NEXT: xchgw %si, %di # sched: [2:1.00] -; ATOM-NEXT: xchgw %di, (%rdx) # sched: [3:1.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xchg_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: xchgw %di, %ax # sched: [1:0.50] -; SLM-NEXT: xchgw %si, %di # sched: [1:0.50] -; SLM-NEXT: xchgw %di, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_xchg_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: xchgw %di, %ax # sched: [2:1.00] -; SANDY-NEXT: xchgw %si, %di # sched: [2:1.00] -; SANDY-NEXT: xchgw %di, (%rdx) # sched: [6:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xchg_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xchgw %di, %ax # sched: [2:0.75] -; HASWELL-NEXT: xchgw %si, %di # sched: [2:0.75] -; HASWELL-NEXT: xchgw %di, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xchg_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xchgw %di, %ax # sched: [2:0.75] -; BROADWELL-NEXT: xchgw %si, %di # sched: [2:0.75] -; BROADWELL-NEXT: xchgw %di, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xchg_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xchgw %di, %ax # sched: [2:0.75] -; SKYLAKE-NEXT: xchgw %si, %di # sched: [2:0.75] -; SKYLAKE-NEXT: xchgw %di, (%rdx) # sched: [10:1.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xchg_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: xchgw %di, %ax # sched: [2:0.75] -; SKX-NEXT: xchgw %si, %di # sched: [2:0.75] -; SKX-NEXT: xchgw %di, (%rdx) # sched: [10:1.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xchg_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xchgw %di, %ax # sched: [1:1.00] -; BDVER2-NEXT: xchgw %si, %di # sched: [2:1.00] -; BDVER2-NEXT: xchgw %di, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xchg_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xchgw %di, %ax # sched: [1:0.50] -; BTVER2-NEXT: xchgw %si, %di # sched: [1:0.50] -; BTVER2-NEXT: xchgw %di, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xchg_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xchgw %di, %ax # sched: [1:0.50] -; ZNVER1-NEXT: xchgw %si, %di # sched: [1:0.50] -; ZNVER1-NEXT: xchgw %di, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "xchg %AX, $0 \0A\09 xchg $1, $0 \0A\09 xchg $2, $0", "r,r,*m"(i16 %a0, i16 %a1, i16 *%a2) nounwind - ret void -} -define void @test_xchg_32(i32 %a0, i32 %a1, i32 *%a2) optsize { -; GENERIC-LABEL: test_xchg_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xchgl %edi, %eax # sched: [2:1.00] -; GENERIC-NEXT: xchgl %esi, %edi # sched: [2:1.00] -; GENERIC-NEXT: xchgl %edi, (%rdx) # sched: [6:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xchg_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: xchgl %edi, %eax # sched: [2:1.00] -; ATOM-NEXT: xchgl %esi, %edi # sched: [2:1.00] -; ATOM-NEXT: xchgl %edi, (%rdx) # sched: [3:1.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xchg_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: xchgl %edi, %eax # sched: [1:0.50] -; SLM-NEXT: xchgl %esi, %edi # sched: [1:0.50] -; SLM-NEXT: xchgl %edi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_xchg_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: xchgl %edi, %eax # sched: [2:1.00] -; SANDY-NEXT: xchgl %esi, %edi # sched: [2:1.00] -; SANDY-NEXT: xchgl %edi, (%rdx) # sched: [6:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xchg_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xchgl %edi, %eax # sched: [2:0.75] -; HASWELL-NEXT: xchgl %esi, %edi # sched: [2:0.75] -; HASWELL-NEXT: xchgl %edi, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xchg_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xchgl %edi, %eax # sched: [2:0.75] -; BROADWELL-NEXT: xchgl %esi, %edi # sched: [2:0.75] -; BROADWELL-NEXT: xchgl %edi, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xchg_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xchgl %edi, %eax # sched: [2:0.75] -; SKYLAKE-NEXT: xchgl %esi, %edi # sched: [2:0.75] -; SKYLAKE-NEXT: xchgl %edi, (%rdx) # sched: [10:1.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xchg_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: xchgl %edi, %eax # sched: [2:0.75] -; SKX-NEXT: xchgl %esi, %edi # sched: [2:0.75] -; SKX-NEXT: xchgl %edi, (%rdx) # sched: [10:1.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xchg_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xchgl %edi, %eax # sched: [1:1.00] -; BDVER2-NEXT: xchgl %esi, %edi # sched: [1:1.00] -; BDVER2-NEXT: xchgl %edi, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xchg_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xchgl %edi, %eax # sched: [1:0.50] -; BTVER2-NEXT: xchgl %esi, %edi # sched: [1:0.50] -; BTVER2-NEXT: xchgl %edi, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xchg_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xchgl %edi, %eax # sched: [1:0.50] -; ZNVER1-NEXT: xchgl %esi, %edi # sched: [1:0.50] -; ZNVER1-NEXT: xchgl %edi, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "xchg %EAX, $0 \0A\09 xchg $1, $0 \0A\09 xchg $2, $0", "r,r,*m"(i32 %a0, i32 %a1, i32 *%a2) nounwind - ret void -} -define void @test_xchg_64(i64 %a0, i64 %a1, i64 *%a2) optsize { -; GENERIC-LABEL: test_xchg_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xchgq %rdi, %rax # sched: [2:1.00] -; GENERIC-NEXT: xchgq %rsi, %rdi # sched: [2:1.00] -; GENERIC-NEXT: xchgq %rdi, (%rdx) # sched: [6:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xchg_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: xchgq %rdi, %rax # sched: [2:1.00] -; ATOM-NEXT: xchgq %rsi, %rdi # sched: [2:1.00] -; ATOM-NEXT: xchgq %rdi, (%rdx) # sched: [3:1.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xchg_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: xchgq %rdi, %rax # sched: [1:0.50] -; SLM-NEXT: xchgq %rsi, %rdi # sched: [1:0.50] -; SLM-NEXT: xchgq %rdi, (%rdx) # sched: [4:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_xchg_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: xchgq %rdi, %rax # sched: [2:1.00] -; SANDY-NEXT: xchgq %rsi, %rdi # sched: [2:1.00] -; SANDY-NEXT: xchgq %rdi, (%rdx) # sched: [6:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xchg_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xchgq %rdi, %rax # sched: [2:0.75] -; HASWELL-NEXT: xchgq %rsi, %rdi # sched: [2:0.75] -; HASWELL-NEXT: xchgq %rdi, (%rdx) # sched: [9:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xchg_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xchgq %rdi, %rax # sched: [2:0.75] -; BROADWELL-NEXT: xchgq %rsi, %rdi # sched: [2:0.75] -; BROADWELL-NEXT: xchgq %rdi, (%rdx) # sched: [8:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xchg_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xchgq %rdi, %rax # sched: [2:0.75] -; SKYLAKE-NEXT: xchgq %rsi, %rdi # sched: [2:0.75] -; SKYLAKE-NEXT: xchgq %rdi, (%rdx) # sched: [10:1.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xchg_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: xchgq %rdi, %rax # sched: [2:0.75] -; SKX-NEXT: xchgq %rsi, %rdi # sched: [2:0.75] -; SKX-NEXT: xchgq %rdi, (%rdx) # sched: [10:1.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xchg_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xchgq %rdi, %rax # sched: [1:1.00] -; BDVER2-NEXT: xchgq %rsi, %rdi # sched: [1:1.00] -; BDVER2-NEXT: xchgq %rdi, (%rdx) # sched: [5:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xchg_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xchgq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: xchgq %rsi, %rdi # sched: [1:0.50] -; BTVER2-NEXT: xchgq %rdi, (%rdx) # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xchg_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xchgq %rdi, %rax # sched: [1:0.50] -; ZNVER1-NEXT: xchgq %rsi, %rdi # sched: [1:0.50] -; ZNVER1-NEXT: xchgq %rdi, (%rdx) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "xchg %RAX, $0 \0A\09 xchg $1, $0 \0A\09 xchg $2, $0", "r,r,*m"(i64 %a0, i64 %a1, i64 *%a2) nounwind - ret void -} - -define void @test_xlat() optsize { -; GENERIC-LABEL: test_xlat: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xlatb # sched: [5:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xlat: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: xlatb # sched: [6:3.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xlat: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: xlatb # sched: [3:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_xlat: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: xlatb # sched: [5:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xlat: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xlatb # sched: [7:0.75] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xlat: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xlatb # sched: [5:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xlat: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xlatb # sched: [5:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xlat: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: xlatb # sched: [5:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xlat: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xlatb # sched: [6:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xlat: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xlatb # sched: [3:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xlat: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xlatb # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "xlat", ""() nounwind - ret void -} - -define void @test_xor_8(i8 %a0, i8* %a1, i8 %a2) optsize { -; GENERIC-LABEL: test_xor_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xorb $7, %al # sched: [1:0.33] -; GENERIC-NEXT: xorb $7, %dil # sched: [1:0.33] -; GENERIC-NEXT: xorb $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: xorb %dl, %dil # sched: [1:0.33] -; GENERIC-NEXT: xorb %dil, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: xorb (%rsi), %dil # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xor_8: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: xorb $7, %al # sched: [1:0.50] -; ATOM-NEXT: xorb $7, %dil # sched: [1:0.50] -; ATOM-NEXT: xorb $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: xorb %dl, %dil # sched: [1:0.50] -; ATOM-NEXT: xorb %dil, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: xorb (%rsi), %dil # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xor_8: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: xorb $7, %al # sched: [1:0.50] -; SLM-NEXT: xorb $7, %dil # sched: [1:0.50] -; SLM-NEXT: xorb $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: xorb %dl, %dil # sched: [1:0.50] -; SLM-NEXT: xorb %dil, (%rsi) # sched: [5:2.00] -; SLM-NEXT: xorb (%rsi), %dil # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_xor_8: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: xorb $7, %al # sched: [1:0.33] -; SANDY-NEXT: xorb $7, %dil # sched: [1:0.33] -; SANDY-NEXT: xorb $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: xorb %dl, %dil # sched: [1:0.33] -; SANDY-NEXT: xorb %dil, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: xorb (%rsi), %dil # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xor_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xorb $7, %al # sched: [1:0.25] -; HASWELL-NEXT: xorb $7, %dil # sched: [1:0.25] -; HASWELL-NEXT: xorb $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: xorb %dl, %dil # sched: [1:0.25] -; HASWELL-NEXT: xorb %dil, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: xorb (%rsi), %dil # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xor_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xorb $7, %al # sched: [1:0.25] -; BROADWELL-NEXT: xorb $7, %dil # sched: [1:0.25] -; BROADWELL-NEXT: xorb $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: xorb %dl, %dil # sched: [1:0.25] -; BROADWELL-NEXT: xorb %dil, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: xorb (%rsi), %dil # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xor_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xorb $7, %al # sched: [1:0.25] -; SKYLAKE-NEXT: xorb $7, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: xorb $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: xorb %dl, %dil # sched: [1:0.25] -; SKYLAKE-NEXT: xorb %dil, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: xorb (%rsi), %dil # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xor_8: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: xorb $7, %al # sched: [1:0.25] -; SKX-NEXT: xorb $7, %dil # sched: [1:0.25] -; SKX-NEXT: xorb $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: xorb %dl, %dil # sched: [1:0.25] -; SKX-NEXT: xorb %dil, (%rsi) # sched: [7:1.00] -; SKX-NEXT: xorb (%rsi), %dil # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xor_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xorb $7, %al # sched: [1:0.50] -; BDVER2-NEXT: xorb $7, %dil # sched: [1:0.50] -; BDVER2-NEXT: xorb $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: xorb %dl, %dil # sched: [1:0.50] -; BDVER2-NEXT: xorb %dil, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: xorb (%rsi), %dil # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xor_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xorb $7, %al # sched: [1:0.50] -; BTVER2-NEXT: xorb $7, %dil # sched: [1:0.50] -; BTVER2-NEXT: xorb $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: xorb %dl, %dil # sched: [1:0.50] -; BTVER2-NEXT: xorb %dil, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: xorb (%rsi), %dil # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xor_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xorb $7, %al # sched: [1:0.25] -; ZNVER1-NEXT: xorb $7, %dil # sched: [1:0.25] -; ZNVER1-NEXT: xorb $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: xorb %dl, %dil # sched: [1:0.25] -; ZNVER1-NEXT: xorb %dil, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: xorb (%rsi), %dil # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "xorb $3, %AL \0A\09 xorb $3, $0 \0A\09 xorb $3, $2 \0A\09 xorb $1, $0 \0A\09 xorb $0, $2 \0A\09 xorb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind - ret void -} -define void @test_xor_16(i16 %a0, i16* %a1, i16 %a2) optsize { -; GENERIC-LABEL: test_xor_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xorw $511, %ax # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: xorw $511, %di # imm = 0x1FF -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: xorw $511, (%rsi) # imm = 0x1FF -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: xorw $7, %di # sched: [1:0.33] -; GENERIC-NEXT: xorw $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: xorw %dx, %di # sched: [1:0.33] -; GENERIC-NEXT: xorw %di, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: xorw (%rsi), %di # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xor_16: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: xorw $511, %ax # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: xorw $511, %di # imm = 0x1FF -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: xorw $511, (%rsi) # imm = 0x1FF -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: xorw $7, %di # sched: [1:0.50] -; ATOM-NEXT: xorw $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: xorw %dx, %di # sched: [1:0.50] -; ATOM-NEXT: xorw %di, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: xorw (%rsi), %di # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xor_16: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: xorw $511, %ax # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: xorw $511, %di # imm = 0x1FF -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: xorw $511, (%rsi) # imm = 0x1FF -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: xorw $7, %di # sched: [1:0.50] -; SLM-NEXT: xorw $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: xorw %dx, %di # sched: [1:0.50] -; SLM-NEXT: xorw %di, (%rsi) # sched: [5:2.00] -; SLM-NEXT: xorw (%rsi), %di # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_xor_16: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: xorw $511, %ax # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: xorw $511, %di # imm = 0x1FF -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: xorw $511, (%rsi) # imm = 0x1FF -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: xorw $7, %di # sched: [1:0.33] -; SANDY-NEXT: xorw $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: xorw %dx, %di # sched: [1:0.33] -; SANDY-NEXT: xorw %di, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: xorw (%rsi), %di # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xor_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xorw $511, %ax # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: xorw $511, %di # imm = 0x1FF -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: xorw $511, (%rsi) # imm = 0x1FF -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: xorw $7, %di # sched: [1:0.25] -; HASWELL-NEXT: xorw $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: xorw %dx, %di # sched: [1:0.25] -; HASWELL-NEXT: xorw %di, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: xorw (%rsi), %di # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xor_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xorw $511, %ax # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: xorw $511, %di # imm = 0x1FF -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: xorw $511, (%rsi) # imm = 0x1FF -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: xorw $7, %di # sched: [1:0.25] -; BROADWELL-NEXT: xorw $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: xorw %dx, %di # sched: [1:0.25] -; BROADWELL-NEXT: xorw %di, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: xorw (%rsi), %di # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xor_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xorw $511, %ax # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: xorw $511, %di # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: xorw $511, (%rsi) # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: xorw $7, %di # sched: [1:0.25] -; SKYLAKE-NEXT: xorw $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: xorw %dx, %di # sched: [1:0.25] -; SKYLAKE-NEXT: xorw %di, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: xorw (%rsi), %di # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xor_16: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: xorw $511, %ax # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: xorw $511, %di # imm = 0x1FF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: xorw $511, (%rsi) # imm = 0x1FF -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: xorw $7, %di # sched: [1:0.25] -; SKX-NEXT: xorw $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: xorw %dx, %di # sched: [1:0.25] -; SKX-NEXT: xorw %di, (%rsi) # sched: [7:1.00] -; SKX-NEXT: xorw (%rsi), %di # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xor_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xorw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: xorw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: xorw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: xorw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: xorw $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: xorw %dx, %di # sched: [1:0.50] -; BDVER2-NEXT: xorw %di, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: xorw (%rsi), %di # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xor_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xorw $511, %ax # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: xorw $511, %di # imm = 0x1FF -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: xorw $511, (%rsi) # imm = 0x1FF -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: xorw $7, %di # sched: [1:0.50] -; BTVER2-NEXT: xorw $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: xorw %dx, %di # sched: [1:0.50] -; BTVER2-NEXT: xorw %di, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: xorw (%rsi), %di # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xor_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xorw $511, %ax # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: xorw $511, %di # imm = 0x1FF -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: xorw $511, (%rsi) # imm = 0x1FF -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: xorw $7, %di # sched: [1:0.25] -; ZNVER1-NEXT: xorw $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: xorw %dx, %di # sched: [1:0.25] -; ZNVER1-NEXT: xorw %di, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: xorw (%rsi), %di # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "xorw $3, %AX \0A\09 xorw $3, $0 \0A\09 xorw $3, $2 \0A\09 xorw $4, $0 \0A\09 xorw $4, $2 \0A\09 xorw $1, $0 \0A\09 xorw $0, $2 \0A\09 xorw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind - ret void -} -define void @test_xor_32(i32 %a0, i32* %a1, i32 %a2) optsize { -; GENERIC-LABEL: test_xor_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xorl $665536, %eax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: xorl $665536, %edi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: xorl $7, %edi # sched: [1:0.33] -; GENERIC-NEXT: xorl $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: xorl %edx, %edi # sched: [1:0.33] -; GENERIC-NEXT: xorl %edi, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: xorl (%rsi), %edi # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xor_32: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: xorl $665536, %eax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: xorl $665536, %edi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: xorl $7, %edi # sched: [1:0.50] -; ATOM-NEXT: xorl $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: xorl %edx, %edi # sched: [1:0.50] -; ATOM-NEXT: xorl %edi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: xorl (%rsi), %edi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xor_32: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: xorl $665536, %eax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: xorl $665536, %edi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: xorl $7, %edi # sched: [1:0.50] -; SLM-NEXT: xorl $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: xorl %edx, %edi # sched: [1:0.50] -; SLM-NEXT: xorl %edi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: xorl (%rsi), %edi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_xor_32: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: xorl $665536, %eax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: xorl $665536, %edi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: xorl $7, %edi # sched: [1:0.33] -; SANDY-NEXT: xorl $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: xorl %edx, %edi # sched: [1:0.33] -; SANDY-NEXT: xorl %edi, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: xorl (%rsi), %edi # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xor_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xorl $665536, %eax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: xorl $665536, %edi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: xorl $7, %edi # sched: [1:0.25] -; HASWELL-NEXT: xorl $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: xorl %edx, %edi # sched: [1:0.25] -; HASWELL-NEXT: xorl %edi, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: xorl (%rsi), %edi # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xor_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xorl $665536, %eax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: xorl $665536, %edi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: xorl $7, %edi # sched: [1:0.25] -; BROADWELL-NEXT: xorl $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: xorl %edx, %edi # sched: [1:0.25] -; BROADWELL-NEXT: xorl %edi, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: xorl (%rsi), %edi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xor_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xorl $665536, %eax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: xorl $665536, %edi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: xorl $7, %edi # sched: [1:0.25] -; SKYLAKE-NEXT: xorl $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: xorl %edx, %edi # sched: [1:0.25] -; SKYLAKE-NEXT: xorl %edi, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: xorl (%rsi), %edi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xor_32: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: xorl $665536, %eax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: xorl $665536, %edi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: xorl $7, %edi # sched: [1:0.25] -; SKX-NEXT: xorl $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: xorl %edx, %edi # sched: [1:0.25] -; SKX-NEXT: xorl %edi, (%rsi) # sched: [7:1.00] -; SKX-NEXT: xorl (%rsi), %edi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xor_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xorl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: xorl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: xorl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: xorl $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: xorl %edx, %edi # sched: [1:0.50] -; BDVER2-NEXT: xorl %edi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: xorl (%rsi), %edi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xor_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xorl $665536, %eax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: xorl $665536, %edi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: xorl $7, %edi # sched: [1:0.50] -; BTVER2-NEXT: xorl $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: xorl %edx, %edi # sched: [1:0.50] -; BTVER2-NEXT: xorl %edi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: xorl (%rsi), %edi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xor_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xorl $665536, %eax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: xorl $665536, %edi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: xorl $7, %edi # sched: [1:0.25] -; ZNVER1-NEXT: xorl $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: xorl %edx, %edi # sched: [1:0.25] -; ZNVER1-NEXT: xorl %edi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: xorl (%rsi), %edi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "xorl $3, %EAX \0A\09 xorl $3, $0 \0A\09 xorl $3, $2 \0A\09 xorl $4, $0 \0A\09 xorl $4, $2 \0A\09 xorl $1, $0 \0A\09 xorl $0, $2 \0A\09 xorl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind - ret void -} -define void @test_xor_64(i64 %a0, i64* %a1, i64 %a2) optsize { -; GENERIC-LABEL: test_xor_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: xorq $665536, %rax # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: xorq $665536, %rdi # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [1:0.33] -; GENERIC-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0 -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: xorq $7, %rdi # sched: [1:0.33] -; GENERIC-NEXT: xorq $7, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: xorq %rdx, %rdi # sched: [1:0.33] -; GENERIC-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00] -; GENERIC-NEXT: xorq (%rsi), %rdi # sched: [6:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xor_64: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: xorq $665536, %rax # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: xorq $665536, %rdi # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:0.50] -; ATOM-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0 -; ATOM-NEXT: # sched: [1:1.00] -; ATOM-NEXT: xorq $7, %rdi # sched: [1:0.50] -; ATOM-NEXT: xorq $7, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: xorq %rdx, %rdi # sched: [1:0.50] -; ATOM-NEXT: xorq %rdi, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: xorq (%rsi), %rdi # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xor_64: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: xorq $665536, %rax # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: xorq $665536, %rdi # imm = 0xA27C0 -; SLM-NEXT: # sched: [1:0.50] -; SLM-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0 -; SLM-NEXT: # sched: [5:2.00] -; SLM-NEXT: xorq $7, %rdi # sched: [1:0.50] -; SLM-NEXT: xorq $7, (%rsi) # sched: [5:2.00] -; SLM-NEXT: xorq %rdx, %rdi # sched: [1:0.50] -; SLM-NEXT: xorq %rdi, (%rsi) # sched: [5:2.00] -; SLM-NEXT: xorq (%rsi), %rdi # sched: [4:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_xor_64: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: xorq $665536, %rax # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: xorq $665536, %rdi # imm = 0xA27C0 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0 -; SANDY-NEXT: # sched: [7:1.00] -; SANDY-NEXT: xorq $7, %rdi # sched: [1:0.33] -; SANDY-NEXT: xorq $7, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: xorq %rdx, %rdi # sched: [1:0.33] -; SANDY-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00] -; SANDY-NEXT: xorq (%rsi), %rdi # sched: [6:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_xor_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: xorq $665536, %rax # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: xorq $665536, %rdi # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0 -; HASWELL-NEXT: # sched: [7:1.00] -; HASWELL-NEXT: xorq $7, %rdi # sched: [1:0.25] -; HASWELL-NEXT: xorq $7, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: xorq %rdx, %rdi # sched: [1:0.25] -; HASWELL-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00] -; HASWELL-NEXT: xorq (%rsi), %rdi # sched: [6:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xor_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: xorq $665536, %rax # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: xorq $665536, %rdi # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [1:0.25] -; BROADWELL-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0 -; BROADWELL-NEXT: # sched: [7:1.00] -; BROADWELL-NEXT: xorq $7, %rdi # sched: [1:0.25] -; BROADWELL-NEXT: xorq $7, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: xorq %rdx, %rdi # sched: [1:0.25] -; BROADWELL-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00] -; BROADWELL-NEXT: xorq (%rsi), %rdi # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xor_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: xorq $665536, %rax # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: xorq $665536, %rdi # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [1:0.25] -; SKYLAKE-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0 -; SKYLAKE-NEXT: # sched: [7:1.00] -; SKYLAKE-NEXT: xorq $7, %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: xorq $7, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: xorq %rdx, %rdi # sched: [1:0.25] -; SKYLAKE-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00] -; SKYLAKE-NEXT: xorq (%rsi), %rdi # sched: [6:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xor_64: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: xorq $665536, %rax # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: xorq $665536, %rdi # imm = 0xA27C0 -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0 -; SKX-NEXT: # sched: [7:1.00] -; SKX-NEXT: xorq $7, %rdi # sched: [1:0.25] -; SKX-NEXT: xorq $7, (%rsi) # sched: [7:1.00] -; SKX-NEXT: xorq %rdx, %rdi # sched: [1:0.25] -; SKX-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00] -; SKX-NEXT: xorq (%rsi), %rdi # sched: [6:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-LABEL: test_xor_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: xorq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: xorq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.50] -; BDVER2-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: xorq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: xorq $7, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: xorq %rdx, %rdi # sched: [1:0.50] -; BDVER2-NEXT: xorq %rdi, (%rsi) # sched: [6:1.00] -; BDVER2-NEXT: xorq (%rsi), %rdi # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_xor_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: xorq $665536, %rax # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: xorq $665536, %rdi # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [1:0.50] -; BTVER2-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0 -; BTVER2-NEXT: # sched: [5:1.00] -; BTVER2-NEXT: xorq $7, %rdi # sched: [1:0.50] -; BTVER2-NEXT: xorq $7, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: xorq %rdx, %rdi # sched: [1:0.50] -; BTVER2-NEXT: xorq %rdi, (%rsi) # sched: [5:1.00] -; BTVER2-NEXT: xorq (%rsi), %rdi # sched: [4:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_xor_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: xorq $665536, %rax # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: xorq $665536, %rdi # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [1:0.25] -; ZNVER1-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0 -; ZNVER1-NEXT: # sched: [5:0.50] -; ZNVER1-NEXT: xorq $7, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: xorq $7, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: xorq %rdx, %rdi # sched: [1:0.25] -; ZNVER1-NEXT: xorq %rdi, (%rsi) # sched: [5:0.50] -; ZNVER1-NEXT: xorq (%rsi), %rdi # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm "xorq $3, %RAX \0A\09 xorq $3, $0 \0A\09 xorq $3, $2 \0A\09 xorq $4, $0 \0A\09 xorq $4, $2 \0A\09 xorq $1, $0 \0A\09 xorq $0, $2 \0A\09 xorq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind - ret void -} Index: test/CodeGen/X86/sha-schedule.ll =================================================================== --- test/CodeGen/X86/sha-schedule.ll +++ test/CodeGen/X86/sha-schedule.ll @@ -1,242 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sha | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GOLDMONT -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=cannonlake | FileCheck %s --check-prefix=CHECK --check-prefix=CANNONLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -; -; SHA1 -; - -define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_sha1msg1: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GOLDMONT-LABEL: test_sha1msg1: -; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:1.00] -; GOLDMONT-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [7:1.00] -; GOLDMONT-NEXT: retq # sched: [4:1.00] -; -; CANNONLAKE-LABEL: test_sha1msg1: -; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.50] -; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:0.50] -; CANNONLAKE-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_sha1msg1: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [2:1.00] -; ZNVER1-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [9:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <4 x i32>, <4 x i32>* %a2 - %2 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a0, <4 x i32> %a1) - %3 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %2, <4 x i32> %1) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_sha1msg2: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GOLDMONT-LABEL: test_sha1msg2: -; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:1.00] -; GOLDMONT-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [7:1.00] -; GOLDMONT-NEXT: retq # sched: [4:1.00] -; -; CANNONLAKE-LABEL: test_sha1msg2: -; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.50] -; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:0.50] -; CANNONLAKE-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_sha1msg2: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [1:0.50] -; ZNVER1-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <4 x i32>, <4 x i32>* %a2 - %2 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a0, <4 x i32> %a1) - %3 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %2, <4 x i32> %1) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_sha1nexte: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sha1nexte %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: sha1nexte (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GOLDMONT-LABEL: test_sha1nexte: -; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:1.00] -; GOLDMONT-NEXT: sha1nexte (%rdi), %xmm0 # sched: [7:1.00] -; GOLDMONT-NEXT: retq # sched: [4:1.00] -; -; CANNONLAKE-LABEL: test_sha1nexte: -; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.50] -; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:0.50] -; CANNONLAKE-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_sha1nexte: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: sha1nexte %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: sha1nexte (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <4 x i32>, <4 x i32>* %a2 - %2 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a0, <4 x i32> %a1) - %3 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %2, <4 x i32> %1) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_sha1rnds4: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GOLDMONT-LABEL: test_sha1rnds4: -; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:1.00] -; GOLDMONT-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [7:1.00] -; GOLDMONT-NEXT: retq # sched: [4:1.00] -; -; CANNONLAKE-LABEL: test_sha1rnds4: -; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.50] -; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:0.50] -; CANNONLAKE-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_sha1rnds4: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [6:1.00] -; ZNVER1-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [13:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <4 x i32>, <4 x i32>* %a2 - %2 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, i8 3) - %3 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %2, <4 x i32> %1, i8 3) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) - -; -; SHA256 -; - -define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_sha256msg1: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GOLDMONT-LABEL: test_sha256msg1: -; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:1.00] -; GOLDMONT-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [7:1.00] -; GOLDMONT-NEXT: retq # sched: [4:1.00] -; -; CANNONLAKE-LABEL: test_sha256msg1: -; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.50] -; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:0.50] -; CANNONLAKE-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_sha256msg1: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [2:1.00] -; ZNVER1-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [9:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <4 x i32>, <4 x i32>* %a2 - %2 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a0, <4 x i32> %a1) - %3 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %2, <4 x i32> %1) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_sha256msg2: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GOLDMONT-LABEL: test_sha256msg2: -; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:1.00] -; GOLDMONT-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [7:1.00] -; GOLDMONT-NEXT: retq # sched: [4:1.00] -; -; CANNONLAKE-LABEL: test_sha256msg2: -; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.50] -; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:0.50] -; CANNONLAKE-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_sha256msg2: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <4 x i32>, <4 x i32>* %a2 - %2 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a0, <4 x i32> %a1) - %3 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %2, <4 x i32> %1) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> *%a3) { -; GENERIC-LABEL: test_sha256rnds2: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] -; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00] -; GENERIC-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00] -; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; GOLDMONT-LABEL: test_sha256rnds2: -; GOLDMONT: # %bb.0: -; GOLDMONT-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50] -; GOLDMONT-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] -; GOLDMONT-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00] -; GOLDMONT-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [7:1.00] -; GOLDMONT-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] -; GOLDMONT-NEXT: retq # sched: [4:1.00] -; -; CANNONLAKE-LABEL: test_sha256rnds2: -; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.33] -; CANNONLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] -; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.50] -; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:0.50] -; CANNONLAKE-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.33] -; CANNONLAKE-NEXT: retq # sched: [7:1.00] -; -; ZNVER1-LABEL: test_sha256rnds2: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.25] -; ZNVER1-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00] -; ZNVER1-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00] -; ZNVER1-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <4 x i32>, <4 x i32>* %a3 - %2 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) - %3 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %2, <4 x i32> %1, <4 x i32> %a2) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) Index: test/CodeGen/X86/sse-schedule.ll =================================================================== --- test/CodeGen/X86/sse-schedule.ll +++ test/CodeGen/X86/sse-schedule.ll @@ -1,6975 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 - -; FIXME: we should really use -mattr=-sse2 here but some of the comparison tests don't work without access to legal <4 x i32> types. - -define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_addps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_addps: -; ATOM: # %bb.0: -; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: addps (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_addps: -; SLM: # %bb.0: -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: addps (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_addps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_addps: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_addps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_addps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_addps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: addps (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_addps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_addps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_addps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_addps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_addps: -; SKX: # %bb.0: -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_addps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_addps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_addps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: addps (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_addps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_addps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_addps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fadd <4 x float> %a0, %a1 - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = fadd <4 x float> %1, %2 - ret <4 x float> %3 -} - -define float @test_addss(float %a0, float %a1, float *%a2) { -; GENERIC-LABEL: test_addss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_addss: -; ATOM: # %bb.0: -; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: addss (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_addss: -; SLM: # %bb.0: -; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: addss (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_addss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_addss: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_addss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_addss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_addss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_addss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_addss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_addss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_addss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_addss: -; SKX: # %bb.0: -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_addss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: addss (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_addss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_addss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_addss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_addss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: addss (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_addss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fadd float %a0, %a1 - %2 = load float, float *%a2, align 4 - %3 = fadd float %1, %2 - ret float %3 -} - -define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_andps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_andps: -; ATOM: # %bb.0: -; ATOM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: andps (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_andps: -; SLM: # %bb.0: -; SLM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: andps (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_andps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_andps: -; SANDY: # %bb.0: -; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_andps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_andps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_andps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: andps (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_andps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_andps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_andps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_andps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_andps: -; SKX: # %bb.0: -; SKX-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_andps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_andps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_andps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: andps (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_andps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_andps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andps (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_andps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <4 x float> %a0 to <4 x i32> - %2 = bitcast <4 x float> %a1 to <4 x i32> - %3 = and <4 x i32> %1, %2 - %4 = load <4 x float>, <4 x float> *%a2, align 16 - %5 = bitcast <4 x float> %4 to <4 x i32> - %6 = and <4 x i32> %3, %5 - %7 = bitcast <4 x i32> %6 to <4 x float> - ret <4 x float> %7 -} - -define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_andnotps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_andnotps: -; ATOM: # %bb.0: -; ATOM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: andnps (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_andnotps: -; SLM: # %bb.0: -; SLM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_andnotps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_andnotps: -; SANDY: # %bb.0: -; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_andnotps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_andnotps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_andnotps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_andnotps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_andnotps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_andnotps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_andnotps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_andnotps: -; SKX: # %bb.0: -; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_andnotps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_andnotps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_andnotps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_andnotps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_andnotps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_andnotps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <4 x float> %a0 to <4 x i32> - %2 = bitcast <4 x float> %a1 to <4 x i32> - %3 = xor <4 x i32> %1, - %4 = and <4 x i32> %3, %2 - %5 = load <4 x float>, <4 x float> *%a2, align 16 - %6 = bitcast <4 x float> %5 to <4 x i32> - %7 = xor <4 x i32> %4, - %8 = and <4 x i32> %6, %7 - %9 = bitcast <4 x i32> %8 to <4 x float> - ret <4 x float> %9 -} - -define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_cmpps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmpps: -; ATOM: # %bb.0: -; ATOM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [5:5.00] -; ATOM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmpps: -; SLM: # %bb.0: -; SLM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cmpps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cmpps: -; SANDY: # %bb.0: -; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cmpps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cmpps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cmpps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmpps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cmpps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmpps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cmpps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.50] -; SKX-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmpps: -; SKX: # %bb.0: -; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50] -; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cmpps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [2:1.00] -; BDVER2-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cmpps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [2:1.00] -; BDVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cmpps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cmpps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cmpps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cmpps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; ZNVER1-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fcmp oeq <4 x float> %a0, %a1 - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = fcmp oeq <4 x float> %a0, %2 - %4 = sext <4 x i1> %1 to <4 x i32> - %5 = sext <4 x i1> %3 to <4 x i32> - %6 = or <4 x i32> %4, %5 - %7 = bitcast <4 x i32> %6 to <4 x float> - ret <4 x float> %7 -} - -define float @test_cmpss(float %a0, float %a1, float *%a2) { -; GENERIC-LABEL: test_cmpss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmpss: -; ATOM: # %bb.0: -; ATOM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmpss: -; SLM: # %bb.0: -; SLM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cmpss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cmpss: -; SANDY: # %bb.0: -; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cmpss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cmpss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cmpss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmpss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cmpss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmpss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cmpss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmpss: -; SKX: # %bb.0: -; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cmpss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cmpss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cmpss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cmpss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cmpss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cmpss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <4 x float> undef, float %a0, i32 0 - %2 = insertelement <4 x float> undef, float %a1, i32 0 - %3 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %2, i8 0) - %4 = load float, float *%a2, align 4 - %5 = insertelement <4 x float> undef, float %4, i32 0 - %6 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %3, <4 x float> %5, i8 0) - %7 = extractelement <4 x float> %6, i32 0 - ret float %7 -} -declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone - -define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_comiss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: setnp %al # sched: [1:0.50] -; GENERIC-NEXT: sete %cl # sched: [1:0.50] -; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] -; GENERIC-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: setnp %al # sched: [1:0.50] -; GENERIC-NEXT: sete %dl # sched: [1:0.50] -; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] -; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] -; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_comiss: -; ATOM: # %bb.0: -; ATOM-NEXT: comiss %xmm1, %xmm0 # sched: [9:4.50] -; ATOM-NEXT: setnp %al # sched: [1:0.50] -; ATOM-NEXT: sete %cl # sched: [1:0.50] -; ATOM-NEXT: andb %al, %cl # sched: [1:0.50] -; ATOM-NEXT: comiss (%rdi), %xmm0 # sched: [10:5.00] -; ATOM-NEXT: setnp %al # sched: [1:0.50] -; ATOM-NEXT: sete %dl # sched: [1:0.50] -; ATOM-NEXT: andb %al, %dl # sched: [1:0.50] -; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50] -; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_comiss: -; SLM: # %bb.0: -; SLM-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: setnp %al # sched: [1:0.50] -; SLM-NEXT: sete %cl # sched: [1:0.50] -; SLM-NEXT: andb %al, %cl # sched: [1:0.50] -; SLM-NEXT: comiss (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: setnp %al # sched: [1:0.50] -; SLM-NEXT: sete %dl # sched: [1:0.50] -; SLM-NEXT: andb %al, %dl # sched: [1:0.50] -; SLM-NEXT: orb %cl, %dl # sched: [1:0.50] -; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_comiss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] -; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] -; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] -; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] -; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] -; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] -; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_comiss: -; SANDY: # %bb.0: -; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.50] -; SANDY-NEXT: sete %cl # sched: [1:0.50] -; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.50] -; SANDY-NEXT: sete %dl # sched: [1:0.50] -; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] -; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] -; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_comiss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] -; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] -; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_comiss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %cl # sched: [1:0.50] -; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %dl # sched: [1:0.50] -; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] -; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] -; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_comiss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] -; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] -; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_comiss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-NEXT: sete %cl # sched: [1:0.50] -; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-NEXT: sete %dl # sched: [1:0.50] -; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] -; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] -; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_comiss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_comiss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] -; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] -; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_comiss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] -; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] -; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_comiss: -; SKX: # %bb.0: -; SKX-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00] -; SKX-NEXT: setnp %al # sched: [1:0.50] -; SKX-NEXT: sete %cl # sched: [1:0.50] -; SKX-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00] -; SKX-NEXT: setnp %al # sched: [1:0.50] -; SKX-NEXT: sete %dl # sched: [1:0.50] -; SKX-NEXT: andb %al, %dl # sched: [1:0.25] -; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_comiss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_comiss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50] -; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50] -; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_comiss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_comiss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-NEXT: sete %cl # sched: [1:0.50] -; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-NEXT: sete %dl # sched: [1:0.50] -; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50] -; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50] -; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_comiss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_comiss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-NEXT: sete %cl # sched: [1:0.25] -; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-NEXT: vcomiss (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-NEXT: sete %dl # sched: [1:0.25] -; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25] -; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25] -; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) - %2 = load <4 x float>, <4 x float> *%a2, align 4 - %3 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %2) - %4 = or i32 %1, %3 - ret i32 %4 -} -declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone - -define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { -; GENERIC-LABEL: test_cvtsi2ss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00] -; GENERIC-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtsi2ss: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtsi2ss: -; SLM: # %bb.0: -; SLM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:0.50] -; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtsi2ss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00] -; SANDY-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00] -; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtsi2ss: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00] -; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtsi2ss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtsi2ss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtsi2ss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtsi2ss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtsi2ss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] -; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtsi2ss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtsi2ss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtsi2ss: -; SKX: # %bb.0: -; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtsi2ss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtsi2ss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtsi2ss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [14:1.00] -; BTVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [9:1.00] -; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtsi2ss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [9:1.00] -; BTVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [14:1.00] -; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtsi2ss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtsi2ss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sitofp i32 %a0 to float - %2 = load i32, i32 *%a1, align 4 - %3 = sitofp i32 %2 to float - %4 = fadd float %1, %3 - ret float %4 -} - -define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { -; GENERIC-LABEL: test_cvtsi2ssq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00] -; GENERIC-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtsi2ssq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtsi2ssq: -; SLM: # %bb.0: -; SLM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [4:0.50] -; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtsi2ssq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00] -; SANDY-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00] -; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtsi2ssq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00] -; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtsi2ssq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00] -; HASWELL-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtsi2ssq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtsi2ssq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00] -; BROADWELL-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtsi2ssq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtsi2ssq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00] -; SKYLAKE-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] -; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtsi2ssq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtsi2ssq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00] -; SKX-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtsi2ssq: -; SKX: # %bb.0: -; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtsi2ssq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [13:1.00] -; BDVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtsi2ssq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtsi2ssq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [14:1.00] -; BTVER2-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [9:1.00] -; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtsi2ssq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [9:1.00] -; BTVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [14:1.00] -; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtsi2ssq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtsi2ssq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sitofp i64 %a0 to float - %2 = load i64, i64 *%a1, align 8 - %3 = sitofp i64 %2 to float - %4 = fadd float %1, %3 - ret float %4 -} - -define i32 @test_cvtss2si(float %a0, float *%a1) { -; GENERIC-LABEL: test_cvtss2si: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00] -; GENERIC-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtss2si: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtss2si (%rdi), %eax # sched: [9:4.50] -; ATOM-NEXT: cvtss2si %xmm0, %ecx # sched: [8:4.00] -; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtss2si: -; SLM: # %bb.0: -; SLM-NEXT: cvtss2si (%rdi), %eax # sched: [7:1.00] -; SLM-NEXT: cvtss2si %xmm0, %ecx # sched: [4:0.50] -; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtss2si: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00] -; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtss2si: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00] -; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtss2si: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00] -; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtss2si: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [9:1.00] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtss2si: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtss2si: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [9:1.00] -; BROADWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtss2si: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtss2si: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvtss2si (%rdi), %eax # sched: [11:1.00] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtss2si: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [6:1.00] -; SKX-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [11:1.00] -; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtss2si: -; SKX: # %bb.0: -; SKX-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00] -; SKX-NEXT: vcvtss2si (%rdi), %eax # sched: [11:1.00] -; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtss2si: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [18:1.00] -; BDVER2-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [13:1.00] -; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtss2si: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [18:1.00] -; BDVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [13:1.00] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtss2si: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [12:1.00] -; BTVER2-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [7:1.00] -; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtss2si: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [12:1.00] -; BTVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [7:1.00] -; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtss2si: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtss2si: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtss2si (%rdi), %eax # sched: [12:1.00] -; ZNVER1-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <4 x float> undef, float %a0, i32 0 - %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %1) - %3 = load float, float *%a1, align 4 - %4 = insertelement <4 x float> undef, float %3, i32 0 - %5 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %4) - %6 = add i32 %2, %5 - ret i32 %6 -} -declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone - -define i64 @test_cvtss2siq(float %a0, float *%a1) { -; GENERIC-LABEL: test_cvtss2siq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00] -; GENERIC-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtss2siq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtss2si (%rdi), %rax # sched: [10:5.00] -; ATOM-NEXT: cvtss2si %xmm0, %rcx # sched: [9:4.50] -; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtss2siq: -; SLM: # %bb.0: -; SLM-NEXT: cvtss2si (%rdi), %rax # sched: [7:1.00] -; SLM-NEXT: cvtss2si %xmm0, %rcx # sched: [4:0.50] -; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtss2siq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00] -; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtss2siq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00] -; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtss2siq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00] -; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtss2siq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [9:1.00] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtss2siq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtss2siq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [9:1.00] -; BROADWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtss2siq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtss2siq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtss2siq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [7:1.00] -; SKX-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00] -; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtss2siq: -; SKX: # %bb.0: -; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [7:1.00] -; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00] -; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtss2siq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [18:1.00] -; BDVER2-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [13:1.00] -; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtss2siq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [18:1.00] -; BDVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [13:1.00] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtss2siq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [12:1.00] -; BTVER2-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [7:1.00] -; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtss2siq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [12:1.00] -; BTVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [7:1.00] -; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtss2siq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtss2siq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtss2si (%rdi), %rax # sched: [12:1.00] -; ZNVER1-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <4 x float> undef, float %a0, i32 0 - %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %1) - %3 = load float, float *%a1, align 4 - %4 = insertelement <4 x float> undef, float %3, i32 0 - %5 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %4) - %6 = add i64 %2, %5 - ret i64 %6 -} -declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone - -define i32 @test_cvttss2si(float %a0, float *%a1) { -; GENERIC-LABEL: test_cvttss2si: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00] -; GENERIC-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvttss2si: -; ATOM: # %bb.0: -; ATOM-NEXT: cvttss2si (%rdi), %eax # sched: [9:4.50] -; ATOM-NEXT: cvttss2si %xmm0, %ecx # sched: [8:4.00] -; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvttss2si: -; SLM: # %bb.0: -; SLM-NEXT: cvttss2si (%rdi), %eax # sched: [7:1.00] -; SLM-NEXT: cvttss2si %xmm0, %ecx # sched: [4:0.50] -; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvttss2si: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00] -; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvttss2si: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00] -; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvttss2si: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00] -; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvttss2si: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [9:1.00] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvttss2si: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttss2si: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [9:1.00] -; BROADWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvttss2si: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttss2si: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00] -; SKYLAKE-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvttss2si: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [6:1.00] -; SKX-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00] -; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttss2si: -; SKX: # %bb.0: -; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [6:1.00] -; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00] -; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvttss2si: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [18:1.00] -; BDVER2-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [13:1.00] -; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvttss2si: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [18:1.00] -; BDVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [13:1.00] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvttss2si: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [12:1.00] -; BTVER2-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00] -; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvttss2si: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [12:1.00] -; BTVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00] -; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvttss2si: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvttss2si: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvttss2si (%rdi), %eax # sched: [12:1.00] -; ZNVER1-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptosi float %a0 to i32 - %2 = load float, float *%a1, align 4 - %3 = fptosi float %2 to i32 - %4 = add i32 %1, %3 - ret i32 %4 -} - -define i64 @test_cvttss2siq(float %a0, float *%a1) { -; GENERIC-LABEL: test_cvttss2siq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00] -; GENERIC-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvttss2siq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvttss2si (%rdi), %rax # sched: [10:5.00] -; ATOM-NEXT: cvttss2si %xmm0, %rcx # sched: [9:4.50] -; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvttss2siq: -; SLM: # %bb.0: -; SLM-NEXT: cvttss2si (%rdi), %rax # sched: [7:1.00] -; SLM-NEXT: cvttss2si %xmm0, %rcx # sched: [4:0.50] -; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvttss2siq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00] -; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvttss2siq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00] -; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvttss2siq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [10:1.00] -; HASWELL-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvttss2siq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [9:1.00] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvttss2siq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [10:1.00] -; BROADWELL-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttss2siq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [9:1.00] -; BROADWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvttss2siq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00] -; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttss2siq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00] -; SKYLAKE-NEXT: vcvttss2si (%rdi), %rax # sched: [11:1.00] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvttss2siq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [7:1.00] -; SKX-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00] -; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttss2siq: -; SKX: # %bb.0: -; SKX-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00] -; SKX-NEXT: vcvttss2si (%rdi), %rax # sched: [11:1.00] -; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvttss2siq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [18:1.00] -; BDVER2-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [13:1.00] -; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvttss2siq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [18:1.00] -; BDVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [13:1.00] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvttss2siq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00] -; BTVER2-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [7:1.00] -; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvttss2siq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [12:1.00] -; BTVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00] -; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvttss2siq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvttss2siq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvttss2si (%rdi), %rax # sched: [12:1.00] -; ZNVER1-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptosi float %a0 to i64 - %2 = load float, float *%a1, align 4 - %3 = fptosi float %2 to i64 - %4 = add i64 %1, %3 - ret i64 %4 -} - -define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_divps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: divps %xmm1, %xmm0 # sched: [14:14.00] -; GENERIC-NEXT: divps (%rdi), %xmm0 # sched: [20:14.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_divps: -; ATOM: # %bb.0: -; ATOM-NEXT: divps %xmm1, %xmm0 # sched: [70:35.00] -; ATOM-NEXT: divps (%rdi), %xmm0 # sched: [70:35.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_divps: -; SLM: # %bb.0: -; SLM-NEXT: divps %xmm1, %xmm0 # sched: [39:39.00] -; SLM-NEXT: divps (%rdi), %xmm0 # sched: [42:39.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_divps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: divps %xmm1, %xmm0 # sched: [14:14.00] -; SANDY-SSE-NEXT: divps (%rdi), %xmm0 # sched: [20:14.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_divps: -; SANDY: # %bb.0: -; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:14.00] -; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:14.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_divps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [13:7.00] -; HASWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [19:7.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_divps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:7.00] -; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [19:7.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_divps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:5.00] -; BROADWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [16:5.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_divps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:5.00] -; BROADWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:5.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_divps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:3.00] -; SKYLAKE-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:5.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_divps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:3.00] -; SKYLAKE-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:5.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_divps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:3.00] -; SKX-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:5.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_divps: -; SKX: # %bb.0: -; SKX-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:3.00] -; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:5.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_divps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [9:9.50] -; BDVER2-SSE-NEXT: divps (%rdi), %xmm0 # sched: [14:9.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_divps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [9:9.50] -; BDVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [14:9.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_divps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [19:19.00] -; BTVER2-SSE-NEXT: divps (%rdi), %xmm0 # sched: [24:19.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_divps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00] -; BTVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_divps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: divps %xmm1, %xmm0 # sched: [15:1.00] -; ZNVER1-SSE-NEXT: divps (%rdi), %xmm0 # sched: [22:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_divps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [15:1.00] -; ZNVER1-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [22:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fdiv <4 x float> %a0, %a1 - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = fdiv <4 x float> %1, %2 - ret <4 x float> %3 -} - -define float @test_divss(float %a0, float %a1, float *%a2) { -; GENERIC-LABEL: test_divss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: divss %xmm1, %xmm0 # sched: [14:14.00] -; GENERIC-NEXT: divss (%rdi), %xmm0 # sched: [20:14.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_divss: -; ATOM: # %bb.0: -; ATOM-NEXT: divss %xmm1, %xmm0 # sched: [34:17.00] -; ATOM-NEXT: divss (%rdi), %xmm0 # sched: [34:17.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_divss: -; SLM: # %bb.0: -; SLM-NEXT: divss %xmm1, %xmm0 # sched: [19:17.00] -; SLM-NEXT: divss (%rdi), %xmm0 # sched: [22:17.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_divss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: divss %xmm1, %xmm0 # sched: [14:14.00] -; SANDY-SSE-NEXT: divss (%rdi), %xmm0 # sched: [20:14.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_divss: -; SANDY: # %bb.0: -; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:14.00] -; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:14.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_divss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [13:7.00] -; HASWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [18:7.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_divss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:7.00] -; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [18:7.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_divss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:3.00] -; BROADWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:5.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_divss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:3.00] -; BROADWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:5.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_divss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:3.00] -; SKYLAKE-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:3.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_divss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:3.00] -; SKYLAKE-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:3.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_divss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:3.00] -; SKX-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:3.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_divss: -; SKX: # %bb.0: -; SKX-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:3.00] -; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:3.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_divss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [9:9.50] -; BDVER2-SSE-NEXT: divss (%rdi), %xmm0 # sched: [14:9.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_divss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [9:9.50] -; BDVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [14:9.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_divss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [19:19.00] -; BTVER2-SSE-NEXT: divss (%rdi), %xmm0 # sched: [24:19.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_divss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00] -; BTVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_divss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: divss %xmm1, %xmm0 # sched: [15:1.00] -; ZNVER1-SSE-NEXT: divss (%rdi), %xmm0 # sched: [22:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_divss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [15:1.00] -; ZNVER1-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [22:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fdiv float %a0, %a1 - %2 = load float, float *%a2, align 4 - %3 = fdiv float %1, %2 - ret float %3 -} - -define void @test_ldmxcsr(i32 %a0) { -; GENERIC-LABEL: test_ldmxcsr: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_ldmxcsr: -; ATOM: # %bb.0: -; ATOM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; ATOM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:2.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_ldmxcsr: -; SLM: # %bb.0: -; SLM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SLM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_ldmxcsr: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SANDY-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_ldmxcsr: -; SANDY: # %bb.0: -; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_ldmxcsr: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; HASWELL-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_ldmxcsr: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_ldmxcsr: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ldmxcsr: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BROADWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_ldmxcsr: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ldmxcsr: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SKYLAKE-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_ldmxcsr: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SKX-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_ldmxcsr: -; SKX: # %bb.0: -; SKX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_ldmxcsr: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BDVER2-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_ldmxcsr: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BDVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_ldmxcsr: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BTVER2-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_ldmxcsr: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BTVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_ldmxcsr: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_ldmxcsr: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50] -; ZNVER1-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = alloca i32, align 4 - %2 = bitcast i32* %1 to i8* - store i32 %a0, i32* %1 - call void @llvm.x86.sse.ldmxcsr(i8* %2) - ret void -} -declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone - -define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_maxps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_maxps: -; ATOM: # %bb.0: -; ATOM-NEXT: maxps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: maxps (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_maxps: -; SLM: # %bb.0: -; SLM-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: maxps (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_maxps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maxps: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_maxps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_maxps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_maxps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maxps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_maxps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maxps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_maxps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maxps: -; SKX: # %bb.0: -; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_maxps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_maxps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_maxps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_maxps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_maxps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_maxps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %1, <4 x float> %2) - ret <4 x float> %3 -} -declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone - -define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_maxss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_maxss: -; ATOM: # %bb.0: -; ATOM-NEXT: maxss %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: maxss (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_maxss: -; SLM: # %bb.0: -; SLM-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: maxss (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_maxss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maxss: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_maxss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_maxss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_maxss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maxss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_maxss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maxss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_maxss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maxss: -; SKX: # %bb.0: -; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_maxss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_maxss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_maxss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_maxss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_maxss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_maxss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2) - ret <4 x float> %3 -} -declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone - -define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_minps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_minps: -; ATOM: # %bb.0: -; ATOM-NEXT: minps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: minps (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_minps: -; SLM: # %bb.0: -; SLM-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: minps (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_minps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_minps: -; SANDY: # %bb.0: -; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_minps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_minps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_minps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: minps (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_minps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_minps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_minps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_minps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_minps: -; SKX: # %bb.0: -; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_minps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: minps (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_minps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_minps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: minps (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_minps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_minps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_minps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %1, <4 x float> %2) - ret <4 x float> %3 -} -declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone - -define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_minss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_minss: -; ATOM: # %bb.0: -; ATOM-NEXT: minss %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: minss (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_minss: -; SLM: # %bb.0: -; SLM-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: minss (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_minss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_minss: -; SANDY: # %bb.0: -; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_minss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: minss (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_minss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_minss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: minss (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_minss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_minss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_minss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_minss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_minss: -; SKX: # %bb.0: -; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_minss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: minss (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_minss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_minss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: minss (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_minss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_minss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: minss (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_minss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2) - ret <4 x float> %3 -} -declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone - -define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_movaps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movaps: -; ATOM: # %bb.0: -; ATOM-NEXT: movaps (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movaps: -; SLM: # %bb.0: -; SLM-NEXT: movaps (%rdi), %xmm0 # sched: [3:1.00] -; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movaps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movaps: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movaps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movaps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movaps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movaps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movaps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movaps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movaps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movaps: -; SKX: # %bb.0: -; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movaps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movaps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movaps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movaps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movaps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movaps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovaps (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <4 x float>, <4 x float> *%a0, align 16 - %2 = fadd <4 x float> %1, %1 - store <4 x float> %2, <4 x float> *%a1, align 16 - ret void -} - -; TODO (v)movhlps - -define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) { -; GENERIC-LABEL: test_movhlps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movhlps: -; ATOM: # %bb.0: -; ATOM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movhlps: -; SLM: # %bb.0: -; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movhlps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movhlps: -; SANDY: # %bb.0: -; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movhlps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movhlps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movhlps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movhlps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movhlps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movhlps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movhlps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movhlps: -; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movhlps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movhlps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movhlps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movhlps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movhlps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movhlps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> - ret <4 x float> %1 -} - -; TODO (v)movhps - -define <4 x float> @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { -; GENERIC-LABEL: test_movhps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movhps: -; ATOM: # %bb.0: -; ATOM-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] -; ATOM-NEXT: addps %xmm1, %xmm2 # sched: [5:5.00] -; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: movhps %xmm2, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movhps: -; SLM: # %bb.0: -; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movhps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movhps: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movhps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movhps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movhps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movhps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movhps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movhps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movhps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00] -; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movhps: -; SKX: # %bb.0: -; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movhps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movhps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movhps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movhps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movhps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movhps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast x86_mmx* %a2 to <2 x float>* - %2 = load <2 x float>, <2 x float> *%1, align 8 - %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> - %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> - %5 = fadd <4 x float> %a0, %4 - %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> - store <2 x float> %6, <2 x float>* %1 - ret <4 x float> %4 -} - -; TODO (v)movlhps - -define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) { -; GENERIC-LABEL: test_movlhps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movlhps: -; ATOM: # %bb.0: -; ATOM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movlhps: -; SLM: # %bb.0: -; SLM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movlhps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movlhps: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movlhps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movlhps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movlhps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movlhps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movlhps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movlhps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movlhps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movlhps: -; SKX: # %bb.0: -; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movlhps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movlhps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50] -; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movlhps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movlhps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movlhps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movlhps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> - %2 = fadd <4 x float> %a1, %1 - ret <4 x float> %2 -} - -define <4 x float> @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { -; GENERIC-LABEL: test_movlps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movlps: -; ATOM: # %bb.0: -; ATOM-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] -; ATOM-NEXT: addps %xmm1, %xmm2 # sched: [5:5.00] -; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: movlps %xmm2, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movlps: -; SLM: # %bb.0: -; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movlps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movlps: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movlps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movlps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movlps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movlps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movlps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movlps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movlps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00] -; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movlps: -; SKX: # %bb.0: -; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movlps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movlps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movlps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movlps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movlps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movlps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast x86_mmx* %a2 to <2 x float>* - %2 = load <2 x float>, <2 x float> *%1, align 8 - %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> - %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> - %5 = fadd <4 x float> %a0, %4 - %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> - store <2 x float> %6, <2 x float>* %1 - ret <4 x float> %4 -} - -define i32 @test_movmskps(<4 x float> %a0) { -; GENERIC-LABEL: test_movmskps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movmskps %xmm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movmskps: -; ATOM: # %bb.0: -; ATOM-NEXT: movmskps %xmm0, %eax # sched: [3:3.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movmskps: -; SLM: # %bb.0: -; SLM-NEXT: movmskps %xmm0, %eax # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movmskps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movmskps: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movmskps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movmskps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movmskps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movmskps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movmskps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movmskps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movmskps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movmskps: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movmskps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movmskps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovmskps %xmm0, %eax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movmskps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movmskps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movmskps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movmskps %xmm0, %eax # sched: [1:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movmskps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovmskps %xmm0, %eax # sched: [1:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) - ret i32 %1 -} -declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone - -define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_movntps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movntps: -; ATOM: # %bb.0: -; ATOM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movntps: -; SLM: # %bb.0: -; SLM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movntps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movntps: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movntps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movntps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movntps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movntps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movntps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movntps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movntps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movntps: -; SKX: # %bb.0: -; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movntps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movntps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movntps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movntps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movntps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movntps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - store <4 x float> %a0, <4 x float> *%a1, align 16, !nontemporal !0 - ret void -} - -define void @test_movss_mem(float* %a0, float* %a1) { -; GENERIC-LABEL: test_movss_mem: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movss_mem: -; ATOM: # %bb.0: -; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:1.00] -; ATOM-NEXT: addss %xmm0, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movss_mem: -; SLM: # %bb.0: -; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00] -; SLM-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movss_mem: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movss_mem: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movss_mem: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movss_mem: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movss_mem: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movss_mem: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movss_mem: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movss_mem: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movss_mem: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movss_mem: -; SKX: # %bb.0: -; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movss_mem: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-SSE-NEXT: addss %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movss_mem: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movss_mem: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movss_mem: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movss_mem: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movss_mem: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovss %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load float, float* %a0, align 1 - %2 = fadd float %1, %1 - store float %2, float *%a1, align 1 - ret void -} - -define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) { -; GENERIC-LABEL: test_movss_reg: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movss_reg: -; ATOM: # %bb.0: -; ATOM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movss_reg: -; SLM: # %bb.0: -; SLM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movss_reg: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movss_reg: -; SANDY: # %bb.0: -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movss_reg: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movss_reg: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movss_reg: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movss_reg: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movss_reg: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movss_reg: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movss_reg: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movss_reg: -; SKX: # %bb.0: -; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movss_reg: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movss_reg: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movss_reg: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movss_reg: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movss_reg: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movss_reg: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> - ret <4 x float> %1 -} - -define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_movups: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movups: -; ATOM: # %bb.0: -; ATOM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: movups %xmm0, (%rsi) # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movups: -; SLM: # %bb.0: -; SLM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.00] -; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movups: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movups: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movups: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movups: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movups: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movups: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movups: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movups: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movups: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movups: -; SKX: # %bb.0: -; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movups: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movups: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movups: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movups: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movups: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movups (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movups: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovups (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovups %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <4 x float>, <4 x float> *%a0, align 1 - %2 = fadd <4 x float> %1, %1 - store <4 x float> %2, <4 x float> *%a1, align 1 - ret void -} - -define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_mulps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_mulps: -; ATOM: # %bb.0: -; ATOM-NEXT: mulps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: mulps (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_mulps: -; SLM: # %bb.0: -; SLM-NEXT: mulps %xmm1, %xmm0 # sched: [5:2.00] -; SLM-NEXT: mulps (%rdi), %xmm0 # sched: [8:2.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_mulps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_mulps: -; SANDY: # %bb.0: -; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_mulps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_mulps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_mulps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [3:0.50] -; BROADWELL-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [8:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mulps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BROADWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_mulps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mulps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_mulps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mulps: -; SKX: # %bb.0: -; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_mulps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_mulps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_mulps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_mulps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_mulps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [3:0.50] -; ZNVER1-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_mulps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; ZNVER1-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fmul <4 x float> %a0, %a1 - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = fmul <4 x float> %1, %2 - ret <4 x float> %3 -} - -define float @test_mulss(float %a0, float %a1, float *%a2) { -; GENERIC-LABEL: test_mulss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_mulss: -; ATOM: # %bb.0: -; ATOM-NEXT: mulss %xmm1, %xmm0 # sched: [4:4.00] -; ATOM-NEXT: mulss (%rdi), %xmm0 # sched: [4:4.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_mulss: -; SLM: # %bb.0: -; SLM-NEXT: mulss %xmm1, %xmm0 # sched: [5:2.00] -; SLM-NEXT: mulss (%rdi), %xmm0 # sched: [8:2.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_mulss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_mulss: -; SANDY: # %bb.0: -; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_mulss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [10:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_mulss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_mulss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [3:0.50] -; BROADWELL-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [8:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mulss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BROADWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_mulss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mulss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_mulss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mulss: -; SKX: # %bb.0: -; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_mulss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_mulss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_mulss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_mulss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_mulss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [3:0.50] -; ZNVER1-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [10:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_mulss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; ZNVER1-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fmul float %a0, %a1 - %2 = load float, float *%a2, align 4 - %3 = fmul float %1, %2 - ret float %3 -} - -define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_orps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_orps: -; ATOM: # %bb.0: -; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: orps (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_orps: -; SLM: # %bb.0: -; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: orps (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_orps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_orps: -; SANDY: # %bb.0: -; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_orps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_orps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_orps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: orps (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_orps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_orps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_orps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_orps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_orps: -; SKX: # %bb.0: -; SKX-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_orps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_orps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_orps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: orps (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_orps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_orps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: orps (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_orps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <4 x float> %a0 to <4 x i32> - %2 = bitcast <4 x float> %a1 to <4 x i32> - %3 = or <4 x i32> %1, %2 - %4 = load <4 x float>, <4 x float> *%a2, align 16 - %5 = bitcast <4 x float> %4 to <4 x i32> - %6 = or <4 x i32> %3, %5 - %7 = bitcast <4 x i32> %6 to <4 x float> - ret <4 x float> %7 -} - -define void @test_prefetch(i8* %a0) optsize { -; GENERIC-LABEL: test_prefetch: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; GENERIC-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] -; GENERIC-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] -; GENERIC-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_prefetch: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: prefetchnta (%rdi) # sched: [1:1.00] -; ATOM-NEXT: prefetcht0 (%rdi) # sched: [1:1.00] -; ATOM-NEXT: prefetcht1 (%rdi) # sched: [1:1.00] -; ATOM-NEXT: prefetcht2 (%rdi) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_prefetch: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: prefetchnta (%rdi) # sched: [3:1.00] -; SLM-NEXT: prefetcht0 (%rdi) # sched: [3:1.00] -; SLM-NEXT: prefetcht1 (%rdi) # sched: [3:1.00] -; SLM-NEXT: prefetcht2 (%rdi) # sched: [3:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_prefetch: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: #APP -; SANDY-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; SANDY-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] -; SANDY-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] -; SANDY-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] -; SANDY-SSE-NEXT: #NO_APP -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_prefetch: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; SANDY-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] -; SANDY-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] -; SANDY-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_prefetch: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: #APP -; HASWELL-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; HASWELL-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] -; HASWELL-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] -; HASWELL-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] -; HASWELL-SSE-NEXT: #NO_APP -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_prefetch: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; HASWELL-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] -; HASWELL-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] -; HASWELL-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_prefetch: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: #APP -; BROADWELL-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; BROADWELL-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] -; BROADWELL-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] -; BROADWELL-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] -; BROADWELL-SSE-NEXT: #NO_APP -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_prefetch: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; BROADWELL-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] -; BROADWELL-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] -; BROADWELL-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_prefetch: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: #APP -; SKYLAKE-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; SKYLAKE-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] -; SKYLAKE-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] -; SKYLAKE-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] -; SKYLAKE-SSE-NEXT: #NO_APP -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_prefetch: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; SKYLAKE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] -; SKYLAKE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] -; SKYLAKE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_prefetch: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: #APP -; SKX-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; SKX-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] -; SKX-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] -; SKX-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] -; SKX-SSE-NEXT: #NO_APP -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_prefetch: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; SKX-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] -; SKX-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] -; SKX-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_prefetch: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: #APP -; BDVER2-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; BDVER2-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] -; BDVER2-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] -; BDVER2-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] -; BDVER2-SSE-NEXT: #NO_APP -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_prefetch: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; BDVER2-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] -; BDVER2-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] -; BDVER2-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_prefetch: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: #APP -; BTVER2-SSE-NEXT: prefetchnta (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: prefetcht0 (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: prefetcht1 (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: prefetcht2 (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: #NO_APP -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_prefetch: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: prefetchnta (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: prefetcht0 (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: prefetcht1 (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: prefetcht2 (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_prefetch: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: #APP -; ZNVER1-SSE-NEXT: prefetchnta (%rdi) # sched: [8:0.50] -; ZNVER1-SSE-NEXT: prefetcht0 (%rdi) # sched: [8:0.50] -; ZNVER1-SSE-NEXT: prefetcht1 (%rdi) # sched: [8:0.50] -; ZNVER1-SSE-NEXT: prefetcht2 (%rdi) # sched: [8:0.50] -; ZNVER1-SSE-NEXT: #NO_APP -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_prefetch: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: prefetchnta (%rdi) # sched: [8:0.50] -; ZNVER1-NEXT: prefetcht0 (%rdi) # sched: [8:0.50] -; ZNVER1-NEXT: prefetcht1 (%rdi) # sched: [8:0.50] -; ZNVER1-NEXT: prefetcht2 (%rdi) # sched: [8:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void asm sideeffect "prefetchnta $0 \0A\09 prefetcht0 $0 \0A\09 prefetcht1 $0 \0A\09 prefetcht2 $0", "*m"(i8 *%a0) - ret void -} - -define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_rcpps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] -; GENERIC-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rcpps: -; ATOM: # %bb.0: -; ATOM-NEXT: rcpps (%rdi), %xmm1 # sched: [10:5.00] -; ATOM-NEXT: rcpps %xmm0, %xmm0 # sched: [9:4.50] -; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] -; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rcpps: -; SLM: # %bb.0: -; SLM-NEXT: rcpps (%rdi), %xmm1 # sched: [8:1.00] -; SLM-NEXT: rcpps %xmm0, %xmm0 # sched: [5:1.00] -; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_rcpps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] -; SANDY-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_rcpps: -; SANDY: # %bb.0: -; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_rcpps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_rcpps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_rcpps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rcpps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_rcpps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00] -; SKYLAKE-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rcpps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_rcpps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00] -; SKX-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rcpps: -; SKX: # %bb.0: -; SKX-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_rcpps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_rcpps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00] -; BDVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_rcpps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_rcpps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00] -; BTVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_rcpps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:0.50] -; ZNVER1-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [12:0.50] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_rcpps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vrcpps (%rdi), %xmm1 # sched: [12:0.50] -; ZNVER1-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:0.50] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %2) - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} -declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone - -; TODO - rcpss_m - -define <4 x float> @test_rcpss(float %a0, float *%a1) { -; GENERIC-LABEL: test_rcpss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rcpss: -; ATOM: # %bb.0: -; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00] -; ATOM-NEXT: rcpss %xmm0, %xmm0 # sched: [4:4.00] -; ATOM-NEXT: rcpss %xmm1, %xmm1 # sched: [4:4.00] -; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rcpss: -; SLM: # %bb.0: -; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00] -; SLM-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] -; SLM-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_rcpss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_rcpss: -; SANDY: # %bb.0: -; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_rcpss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_rcpss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_rcpss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rcpss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_rcpss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rcpss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_rcpss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00] -; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rcpss: -; SKX: # %bb.0: -; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_rcpss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_rcpss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_rcpss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [2:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_rcpss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_rcpss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:0.50] -; ZNVER1-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:0.50] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_rcpss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:0.50] -; ZNVER1-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:0.50] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <4 x float> undef, float %a0, i32 0 - %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %1) - %3 = load float, float *%a1, align 4 - %4 = insertelement <4 x float> undef, float %3, i32 0 - %5 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4) - %6 = fadd <4 x float> %2, %5 - ret <4 x float> %6 -} -declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone - -define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_rsqrtps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] -; GENERIC-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rsqrtps: -; ATOM: # %bb.0: -; ATOM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [10:5.00] -; ATOM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [9:4.50] -; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] -; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rsqrtps: -; SLM: # %bb.0: -; SLM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [8:1.00] -; SLM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [5:1.00] -; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_rsqrtps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] -; SANDY-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_rsqrtps: -; SANDY: # %bb.0: -; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_rsqrtps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_rsqrtps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_rsqrtps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rsqrtps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_rsqrtps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00] -; SKYLAKE-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rsqrtps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_rsqrtps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00] -; SKX-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rsqrtps: -; SKX: # %bb.0: -; SKX-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_rsqrtps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_rsqrtps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00] -; BDVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_rsqrtps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_rsqrtps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00] -; BTVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_rsqrtps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:0.50] -; ZNVER1-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [12:0.50] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_rsqrtps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [12:0.50] -; ZNVER1-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:0.50] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %2) - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} -declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone - -; TODO - rsqrtss_m - -define <4 x float> @test_rsqrtss(float %a0, float *%a1) { -; GENERIC-LABEL: test_rsqrtss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_rsqrtss: -; ATOM: # %bb.0: -; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00] -; ATOM-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:4.00] -; ATOM-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:4.00] -; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_rsqrtss: -; SLM: # %bb.0: -; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00] -; SLM-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] -; SLM-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_rsqrtss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_rsqrtss: -; SANDY: # %bb.0: -; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_rsqrtss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_rsqrtss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_rsqrtss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_rsqrtss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_rsqrtss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_rsqrtss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_rsqrtss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00] -; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_rsqrtss: -; SKX: # %bb.0: -; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_rsqrtss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_rsqrtss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_rsqrtss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [2:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_rsqrtss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_rsqrtss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:0.50] -; ZNVER1-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:0.50] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_rsqrtss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:0.50] -; ZNVER1-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:0.50] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <4 x float> undef, float %a0, i32 0 - %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %1) - %3 = load float, float *%a1, align 4 - %4 = insertelement <4 x float> undef, float %3, i32 0 - %5 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4) - %6 = fadd <4 x float> %2, %5 - ret <4 x float> %6 -} -declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone - -define void @test_sfence() { -; GENERIC-LABEL: test_sfence: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sfence # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sfence: -; ATOM: # %bb.0: -; ATOM-NEXT: sfence # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sfence: -; SLM: # %bb.0: -; SLM-NEXT: sfence # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_sfence: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: sfence # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_sfence: -; SANDY: # %bb.0: -; SANDY-NEXT: sfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_sfence: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: sfence # sched: [2:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_sfence: -; HASWELL: # %bb.0: -; HASWELL-NEXT: sfence # sched: [2:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_sfence: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: sfence # sched: [2:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sfence: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: sfence # sched: [2:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_sfence: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: sfence # sched: [2:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sfence: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: sfence # sched: [2:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_sfence: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: sfence # sched: [2:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sfence: -; SKX: # %bb.0: -; SKX-NEXT: sfence # sched: [2:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_sfence: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: sfence # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_sfence: -; BDVER2: # %bb.0: -; BDVER2-NEXT: sfence # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_sfence: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: sfence # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_sfence: -; BTVER2: # %bb.0: -; BTVER2-NEXT: sfence # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_sfence: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: sfence # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_sfence: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: sfence # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.sse.sfence() - ret void -} -declare void @llvm.x86.sse.sfence() nounwind readnone - -define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) nounwind { -; GENERIC-LABEL: test_shufps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; GENERIC-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_shufps: -; ATOM: # %bb.0: -; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; ATOM-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [1:1.00] -; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_shufps: -; SLM: # %bb.0: -; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; SLM-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [4:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_shufps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; SANDY-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_shufps: -; SANDY: # %bb.0: -; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; SANDY-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_shufps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; HASWELL-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_shufps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; HASWELL-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_shufps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; BROADWELL-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_shufps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; BROADWELL-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_shufps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_shufps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; SKYLAKE-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_shufps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; SKX-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_shufps: -; SKX: # %bb.0: -; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; SKX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_shufps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [2:0.50] -; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:0.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_shufps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [2:0.50] -; BDVER2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:0.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_shufps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50] -; BTVER2-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_shufps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50] -; BTVER2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_shufps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50] -; ZNVER1-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_shufps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50] -; ZNVER1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [8:0.50] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} - -define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_sqrtps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:14.00] -; GENERIC-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:14.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sqrtps: -; ATOM: # %bb.0: -; ATOM-NEXT: sqrtps %xmm0, %xmm1 # sched: [70:35.00] -; ATOM-NEXT: sqrtps (%rdi), %xmm0 # sched: [70:35.00] -; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sqrtps: -; SLM: # %bb.0: -; SLM-NEXT: sqrtps (%rdi), %xmm1 # sched: [44:40.00] -; SLM-NEXT: sqrtps %xmm0, %xmm0 # sched: [41:40.00] -; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_sqrtps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:14.00] -; SANDY-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:14.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_sqrtps: -; SANDY: # %bb.0: -; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:14.00] -; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:14.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_sqrtps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [11:7.00] -; HASWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [17:7.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_sqrtps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [11:7.00] -; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [17:7.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_sqrtps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [11:7.00] -; BROADWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [16:7.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sqrtps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [11:7.00] -; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [16:7.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_sqrtps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:3.00] -; SKYLAKE-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:3.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sqrtps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:3.00] -; SKYLAKE-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:3.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_sqrtps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:3.00] -; SKX-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:3.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sqrtps: -; SKX: # %bb.0: -; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:3.00] -; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:3.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_sqrtps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [9:10.50] -; BDVER2-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [14:10.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_sqrtps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:10.50] -; BDVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [9:10.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_sqrtps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [21:21.00] -; BTVER2-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [26:21.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_sqrtps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00] -; BTVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [21:21.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_sqrtps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [20:20.00] -; ZNVER1-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [27:20.00] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_sqrtps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:20.00] -; ZNVER1-NEXT: vsqrtps %xmm0, %xmm0 # sched: [20:20.00] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %2) - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} -declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone - -; TODO - sqrtss_m - -define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_sqrtss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:14.00] -; GENERIC-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:14.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sqrtss: -; ATOM: # %bb.0: -; ATOM-NEXT: movaps (%rdi), %xmm1 # sched: [1:1.00] -; ATOM-NEXT: sqrtss %xmm0, %xmm0 # sched: [34:17.00] -; ATOM-NEXT: sqrtss %xmm1, %xmm1 # sched: [34:17.00] -; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sqrtss: -; SLM: # %bb.0: -; SLM-NEXT: movaps (%rdi), %xmm1 # sched: [3:1.00] -; SLM-NEXT: sqrtss %xmm0, %xmm0 # sched: [20:20.00] -; SLM-NEXT: sqrtss %xmm1, %xmm1 # sched: [20:20.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_sqrtss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:14.00] -; SANDY-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:14.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_sqrtss: -; SANDY: # %bb.0: -; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00] -; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:14.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_sqrtss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [11:7.00] -; HASWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] -; HASWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [11:7.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_sqrtss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [11:7.00] -; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] -; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [11:7.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_sqrtss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [11:4.00] -; BROADWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [11:4.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sqrtss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [11:4.00] -; BROADWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:0.50] -; BROADWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [11:4.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_sqrtss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:3.00] -; SKYLAKE-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:3.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sqrtss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] -; SKYLAKE-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] -; SKYLAKE-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:3.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_sqrtss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:3.00] -; SKX-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] -; SKX-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:3.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sqrtss: -; SKX: # %bb.0: -; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] -; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] -; SKX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:3.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_sqrtss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:0.50] -; BDVER2-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [9:10.50] -; BDVER2-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [9:10.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_sqrtss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:0.50] -; BDVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [9:10.50] -; BDVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [9:10.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_sqrtss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:1.00] -; BTVER2-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [21:21.00] -; BTVER2-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [21:21.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_sqrtss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00] -; BTVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [21:21.00] -; BTVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [21:21.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_sqrtss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [20:20.00] -; ZNVER1-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [20:20.00] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_sqrtss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovaps (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [20:20.00] -; ZNVER1-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [20:20.00] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %2) - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} -declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone - -define i32 @test_stmxcsr() { -; GENERIC-LABEL: test_stmxcsr: -; GENERIC: # %bb.0: -; GENERIC-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] -; GENERIC-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_stmxcsr: -; ATOM: # %bb.0: -; ATOM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [15:7.50] -; ATOM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_stmxcsr: -; SLM: # %bb.0: -; SLM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SLM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_stmxcsr: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] -; SANDY-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_stmxcsr: -; SANDY: # %bb.0: -; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] -; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_stmxcsr: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] -; HASWELL-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_stmxcsr: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] -; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_stmxcsr: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] -; BROADWELL-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_stmxcsr: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] -; BROADWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_stmxcsr: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_stmxcsr: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] -; SKYLAKE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_stmxcsr: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] -; SKX-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_stmxcsr: -; SKX: # %bb.0: -; SKX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] -; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_stmxcsr: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BDVER2-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_stmxcsr: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BDVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_stmxcsr: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BTVER2-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_stmxcsr: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BTVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_stmxcsr: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25] -; ZNVER1-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_stmxcsr: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25] -; ZNVER1-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = alloca i32, align 4 - %2 = bitcast i32* %1 to i8* - call void @llvm.x86.sse.stmxcsr(i8* %2) - %3 = load i32, i32* %1, align 4 - ret i32 %3 -} -declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone - -define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_subps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_subps: -; ATOM: # %bb.0: -; ATOM-NEXT: subps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: subps (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_subps: -; SLM: # %bb.0: -; SLM-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: subps (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_subps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_subps: -; SANDY: # %bb.0: -; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_subps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_subps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_subps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: subps (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_subps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_subps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_subps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_subps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_subps: -; SKX: # %bb.0: -; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_subps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_subps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_subps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: subps (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_subps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_subps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_subps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fsub <4 x float> %a0, %a1 - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = fsub <4 x float> %1, %2 - ret <4 x float> %3 -} - -define float @test_subss(float %a0, float %a1, float *%a2) { -; GENERIC-LABEL: test_subss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_subss: -; ATOM: # %bb.0: -; ATOM-NEXT: subss %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: subss (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_subss: -; SLM: # %bb.0: -; SLM-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: subss (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_subss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_subss: -; SANDY: # %bb.0: -; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_subss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_subss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_subss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_subss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_subss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_subss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_subss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_subss: -; SKX: # %bb.0: -; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_subss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: subss (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_subss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_subss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_subss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_subss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: subss (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_subss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fsub float %a0, %a1 - %2 = load float, float *%a2, align 4 - %3 = fsub float %1, %2 - ret float %3 -} - -define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_ucomiss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: setnp %al # sched: [1:0.50] -; GENERIC-NEXT: sete %cl # sched: [1:0.50] -; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] -; GENERIC-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: setnp %al # sched: [1:0.50] -; GENERIC-NEXT: sete %dl # sched: [1:0.50] -; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] -; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] -; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_ucomiss: -; ATOM: # %bb.0: -; ATOM-NEXT: ucomiss %xmm1, %xmm0 # sched: [9:4.50] -; ATOM-NEXT: setnp %al # sched: [1:0.50] -; ATOM-NEXT: sete %cl # sched: [1:0.50] -; ATOM-NEXT: andb %al, %cl # sched: [1:0.50] -; ATOM-NEXT: ucomiss (%rdi), %xmm0 # sched: [10:5.00] -; ATOM-NEXT: setnp %al # sched: [1:0.50] -; ATOM-NEXT: sete %dl # sched: [1:0.50] -; ATOM-NEXT: andb %al, %dl # sched: [1:0.50] -; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50] -; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_ucomiss: -; SLM: # %bb.0: -; SLM-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: setnp %al # sched: [1:0.50] -; SLM-NEXT: sete %cl # sched: [1:0.50] -; SLM-NEXT: andb %al, %cl # sched: [1:0.50] -; SLM-NEXT: ucomiss (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: setnp %al # sched: [1:0.50] -; SLM-NEXT: sete %dl # sched: [1:0.50] -; SLM-NEXT: andb %al, %dl # sched: [1:0.50] -; SLM-NEXT: orb %cl, %dl # sched: [1:0.50] -; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_ucomiss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] -; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] -; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] -; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] -; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] -; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] -; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_ucomiss: -; SANDY: # %bb.0: -; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.50] -; SANDY-NEXT: sete %cl # sched: [1:0.50] -; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.50] -; SANDY-NEXT: sete %dl # sched: [1:0.50] -; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] -; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] -; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_ucomiss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] -; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] -; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_ucomiss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %cl # sched: [1:0.50] -; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %dl # sched: [1:0.50] -; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] -; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] -; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_ucomiss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] -; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] -; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ucomiss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-NEXT: sete %cl # sched: [1:0.50] -; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-NEXT: sete %dl # sched: [1:0.50] -; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] -; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] -; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_ucomiss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ucomiss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] -; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] -; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_ucomiss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] -; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] -; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_ucomiss: -; SKX: # %bb.0: -; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] -; SKX-NEXT: setnp %al # sched: [1:0.50] -; SKX-NEXT: sete %cl # sched: [1:0.50] -; SKX-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00] -; SKX-NEXT: setnp %al # sched: [1:0.50] -; SKX-NEXT: sete %dl # sched: [1:0.50] -; SKX-NEXT: andb %al, %dl # sched: [1:0.25] -; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_ucomiss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_ucomiss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50] -; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50] -; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_ucomiss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_ucomiss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-NEXT: sete %cl # sched: [1:0.50] -; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-NEXT: sete %dl # sched: [1:0.50] -; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50] -; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50] -; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_ucomiss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_ucomiss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-NEXT: sete %cl # sched: [1:0.25] -; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-NEXT: vucomiss (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-NEXT: sete %dl # sched: [1:0.25] -; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25] -; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25] -; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) - %2 = load <4 x float>, <4 x float> *%a2, align 4 - %3 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %2) - %4 = or i32 %1, %3 - ret i32 %4 -} -declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone - -define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_unpckhps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; GENERIC-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_unpckhps: -; ATOM: # %bb.0: -; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; ATOM-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00] -; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_unpckhps: -; SLM: # %bb.0: -; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SLM-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [4:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_unpckhps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SANDY-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_unpckhps: -; SANDY: # %bb.0: -; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SANDY-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_unpckhps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_unpckhps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_unpckhps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BROADWELL-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_unpckhps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_unpckhps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_unpckhps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_unpckhps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_unpckhps: -; SKX: # %bb.0: -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_unpckhps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] -; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_unpckhps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] -; BDVER2-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_unpckhps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; BTVER2-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_unpckhps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; BTVER2-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_unpckhps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; ZNVER1-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_unpckhps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} - -define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_unpcklps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_unpcklps: -; ATOM: # %bb.0: -; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; ATOM-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00] -; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_unpcklps: -; SLM: # %bb.0: -; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SLM-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [4:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_unpcklps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_unpcklps: -; SANDY: # %bb.0: -; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_unpcklps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_unpcklps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_unpcklps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_unpcklps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_unpcklps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_unpcklps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_unpcklps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_unpcklps: -; SKX: # %bb.0: -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_unpcklps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_unpcklps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_unpcklps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_unpcklps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_unpcklps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; ZNVER1-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_unpcklps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} - -define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_xorps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xorps: -; ATOM: # %bb.0: -; ATOM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: xorps (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xorps: -; SLM: # %bb.0: -; SLM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: xorps (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_xorps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_xorps: -; SANDY: # %bb.0: -; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_xorps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_xorps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_xorps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xorps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_xorps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xorps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_xorps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xorps: -; SKX: # %bb.0: -; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_xorps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_xorps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_xorps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_xorps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_xorps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_xorps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <4 x float> %a0 to <4 x i32> - %2 = bitcast <4 x float> %a1 to <4 x i32> - %3 = xor <4 x i32> %1, %2 - %4 = load <4 x float>, <4 x float> *%a2, align 16 - %5 = bitcast <4 x float> %4 to <4 x i32> - %6 = xor <4 x i32> %3, %5 - %7 = bitcast <4 x i32> %6 to <4 x float> - ret <4 x float> %7 -} - -; 'WriteZero' and 'WriteNop' class instructions. - -define <4 x float> @test_fnop() nounwind { -; GENERIC-LABEL: test_fnop: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: nop # sched: [1:0.25] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.25] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_fnop: -; ATOM: # %bb.0: -; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: #APP -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_fnop: -; SLM: # %bb.0: -; SLM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50] -; SLM-NEXT: #APP -; SLM-NEXT: nop # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_fnop: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: #APP -; SANDY-SSE-NEXT: nop # sched: [1:0.25] -; SANDY-SSE-NEXT: #NO_APP -; SANDY-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.25] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_fnop: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: nop # sched: [1:0.25] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.25] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_fnop: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: #APP -; HASWELL-SSE-NEXT: nop # sched: [1:0.25] -; HASWELL-SSE-NEXT: #NO_APP -; HASWELL-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_fnop: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: nop # sched: [1:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_fnop: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: #APP -; BROADWELL-SSE-NEXT: nop # sched: [1:0.25] -; BROADWELL-SSE-NEXT: #NO_APP -; BROADWELL-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fnop: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: nop # sched: [1:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_fnop: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: #APP -; SKYLAKE-SSE-NEXT: nop # sched: [1:0.17] -; SKYLAKE-SSE-NEXT: #NO_APP -; SKYLAKE-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_fnop: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: nop # sched: [1:0.17] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_fnop: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: #APP -; SKX-SSE-NEXT: nop # sched: [1:0.17] -; SKX-SSE-NEXT: #NO_APP -; SKX-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_fnop: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: nop # sched: [1:0.17] -; SKX-NEXT: #NO_APP -; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_fnop: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.25] -; BDVER2-SSE-NEXT: #APP -; BDVER2-SSE-NEXT: nop # sched: [1:0.50] -; BDVER2-SSE-NEXT: #NO_APP -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_fnop: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.25] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: nop # sched: [1:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_fnop: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.50] -; BTVER2-SSE-NEXT: #APP -; BTVER2-SSE-NEXT: nop # sched: [1:0.50] -; BTVER2-SSE-NEXT: #NO_APP -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_fnop: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.50] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: nop # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_fnop: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: #APP -; ZNVER1-SSE-NEXT: nop # sched: [1:0.25] -; ZNVER1-SSE-NEXT: #NO_APP -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_fnop: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: nop # sched: [1:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void asm sideeffect "nop", ""() nounwind - ret <4 x float> zeroinitializer -} - -!0 = !{i32 1} Index: test/CodeGen/X86/sse2-schedule.ll =================================================================== --- test/CodeGen/X86/sse2-schedule.ll +++ test/CodeGen/X86/sse2-schedule.ll @@ -1,16972 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2,-xop | FileCheck %s --check-prefixes=CHECK,BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 - -define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_addpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_addpd: -; ATOM: # %bb.0: -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: addpd (%rdi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_addpd: -; SLM: # %bb.0: -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: addpd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_addpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_addpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_addpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_addpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_addpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_addpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_addpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_addpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_addpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_addpd: -; SKX: # %bb.0: -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_addpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_addpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_addpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_addpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_addpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_addpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fadd <2 x double> %a0, %a1 - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = fadd <2 x double> %1, %2 - ret <2 x double> %3 -} - -define double @test_addsd(double %a0, double %a1, double *%a2) { -; GENERIC-LABEL: test_addsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_addsd: -; ATOM: # %bb.0: -; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: addsd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_addsd: -; SLM: # %bb.0: -; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: addsd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_addsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_addsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_addsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_addsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_addsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_addsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_addsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_addsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_addsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_addsd: -; SKX: # %bb.0: -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_addsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_addsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_addsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_addsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_addsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_addsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fadd double %a0, %a1 - %2 = load double, double *%a2, align 8 - %3 = fadd double %1, %2 - ret double %3 -} - -define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_andpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_andpd: -; ATOM: # %bb.0: -; ATOM-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: andpd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_andpd: -; SLM: # %bb.0: -; SLM-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: andpd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_andpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_andpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_andpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_andpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_andpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_andpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_andpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_andpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_andpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_andpd: -; SKX: # %bb.0: -; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_andpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_andpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_andpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_andpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_andpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_andpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <2 x double> %a0 to <4 x i32> - %2 = bitcast <2 x double> %a1 to <4 x i32> - %3 = and <4 x i32> %1, %2 - %4 = load <2 x double>, <2 x double> *%a2, align 16 - %5 = bitcast <2 x double> %4 to <4 x i32> - %6 = and <4 x i32> %3, %5 - %7 = bitcast <4 x i32> %6 to <2 x double> - %8 = fadd <2 x double> %a1, %7 - ret <2 x double> %8 -} - -define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_andnotpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_andnotpd: -; ATOM: # %bb.0: -; ATOM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: andnpd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_andnotpd: -; SLM: # %bb.0: -; SLM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: andnpd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_andnotpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_andnotpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_andnotpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_andnotpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_andnotpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_andnotpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_andnotpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_andnotpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_andnotpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_andnotpd: -; SKX: # %bb.0: -; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_andnotpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_andnotpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_andnotpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_andnotpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_andnotpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_andnotpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <2 x double> %a0 to <4 x i32> - %2 = bitcast <2 x double> %a1 to <4 x i32> - %3 = xor <4 x i32> %1, - %4 = and <4 x i32> %3, %2 - %5 = load <2 x double>, <2 x double> *%a2, align 16 - %6 = bitcast <2 x double> %5 to <4 x i32> - %7 = xor <4 x i32> %4, - %8 = and <4 x i32> %6, %7 - %9 = bitcast <4 x i32> %8 to <2 x double> - %10 = fadd <2 x double> %a1, %9 - ret <2 x double> %10 -} - -define void @test_clflush(i8* %p){ -; GENERIC-LABEL: test_clflush: -; GENERIC: # %bb.0: -; GENERIC-NEXT: clflush (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_clflush: -; ATOM: # %bb.0: -; ATOM-NEXT: clflush (%rdi) # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_clflush: -; SLM: # %bb.0: -; SLM-NEXT: clflush (%rdi) # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_clflush: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: clflush (%rdi) # sched: [5:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_clflush: -; SANDY: # %bb.0: -; SANDY-NEXT: clflush (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_clflush: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_clflush: -; HASWELL: # %bb.0: -; HASWELL-NEXT: clflush (%rdi) # sched: [2:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_clflush: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_clflush: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: clflush (%rdi) # sched: [2:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_clflush: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_clflush: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: clflush (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_clflush: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_clflush: -; SKX: # %bb.0: -; SKX-NEXT: clflush (%rdi) # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_clflush: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: clflush (%rdi) # sched: [5:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_clflush: -; BDVER2: # %bb.0: -; BDVER2-NEXT: clflush (%rdi) # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_clflush: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: clflush (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_clflush: -; BTVER2: # %bb.0: -; BTVER2-NEXT: clflush (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_clflush: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: clflush (%rdi) # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_clflush: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: clflush (%rdi) # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void @llvm.x86.sse2.clflush(i8* %p) - ret void -} -declare void @llvm.x86.sse2.clflush(i8*) nounwind - -define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_cmppd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmppd: -; ATOM: # %bb.0: -; ATOM-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmppd: -; SLM: # %bb.0: -; SLM-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cmppd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cmppd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cmppd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cmppd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cmppd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmppd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cmppd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmppd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cmppd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50] -; SKX-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmppd: -; SKX: # %bb.0: -; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50] -; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cmppd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00] -; BDVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cmppd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00] -; BDVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cmppd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cmppd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cmppd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cmppd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; ZNVER1-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fcmp oeq <2 x double> %a0, %a1 - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = fcmp oeq <2 x double> %a0, %2 - %4 = or <2 x i1> %1, %3 - %5 = sext <2 x i1> %4 to <2 x i64> - %6 = bitcast <2 x i64> %5 to <2 x double> - ret <2 x double> %6 -} - -define double @test_cmpsd(double %a0, double %a1, double *%a2) { -; GENERIC-LABEL: test_cmpsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmpsd: -; ATOM: # %bb.0: -; ATOM-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmpsd: -; SLM: # %bb.0: -; SLM-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cmpsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cmpsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cmpsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cmpsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cmpsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmpsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cmpsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmpsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cmpsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmpsd: -; SKX: # %bb.0: -; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cmpsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cmpsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cmpsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cmpsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cmpsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cmpsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <2 x double> undef, double %a0, i32 0 - %2 = insertelement <2 x double> undef, double %a1, i32 0 - %3 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %1, <2 x double> %2, i8 0) - %4 = load double, double *%a2, align 8 - %5 = insertelement <2 x double> undef, double %4, i32 0 - %6 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %3, <2 x double> %5, i8 0) - %7 = extractelement <2 x double> %6, i32 0 - ret double %7 -} -declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone - -define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_comisd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: setnp %al # sched: [1:0.50] -; GENERIC-NEXT: sete %cl # sched: [1:0.50] -; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] -; GENERIC-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: setnp %al # sched: [1:0.50] -; GENERIC-NEXT: sete %dl # sched: [1:0.50] -; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] -; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] -; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_comisd: -; ATOM: # %bb.0: -; ATOM-NEXT: comisd %xmm1, %xmm0 # sched: [9:4.50] -; ATOM-NEXT: setnp %al # sched: [1:0.50] -; ATOM-NEXT: sete %cl # sched: [1:0.50] -; ATOM-NEXT: andb %al, %cl # sched: [1:0.50] -; ATOM-NEXT: comisd (%rdi), %xmm0 # sched: [10:5.00] -; ATOM-NEXT: setnp %al # sched: [1:0.50] -; ATOM-NEXT: sete %dl # sched: [1:0.50] -; ATOM-NEXT: andb %al, %dl # sched: [1:0.50] -; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50] -; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_comisd: -; SLM: # %bb.0: -; SLM-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: setnp %al # sched: [1:0.50] -; SLM-NEXT: sete %cl # sched: [1:0.50] -; SLM-NEXT: andb %al, %cl # sched: [1:0.50] -; SLM-NEXT: comisd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: setnp %al # sched: [1:0.50] -; SLM-NEXT: sete %dl # sched: [1:0.50] -; SLM-NEXT: andb %al, %dl # sched: [1:0.50] -; SLM-NEXT: orb %cl, %dl # sched: [1:0.50] -; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_comisd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] -; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] -; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] -; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] -; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] -; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] -; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_comisd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.50] -; SANDY-NEXT: sete %cl # sched: [1:0.50] -; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.50] -; SANDY-NEXT: sete %dl # sched: [1:0.50] -; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] -; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] -; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_comisd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] -; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] -; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_comisd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %cl # sched: [1:0.50] -; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %dl # sched: [1:0.50] -; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] -; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] -; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_comisd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] -; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] -; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_comisd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-NEXT: sete %cl # sched: [1:0.50] -; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-NEXT: sete %dl # sched: [1:0.50] -; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] -; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] -; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_comisd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_comisd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] -; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] -; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_comisd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] -; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] -; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_comisd: -; SKX: # %bb.0: -; SKX-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKX-NEXT: setnp %al # sched: [1:0.50] -; SKX-NEXT: sete %cl # sched: [1:0.50] -; SKX-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] -; SKX-NEXT: setnp %al # sched: [1:0.50] -; SKX-NEXT: sete %dl # sched: [1:0.50] -; SKX-NEXT: andb %al, %dl # sched: [1:0.25] -; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_comisd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_comisd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50] -; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50] -; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_comisd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_comisd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-NEXT: sete %cl # sched: [1:0.50] -; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-NEXT: sete %dl # sched: [1:0.50] -; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50] -; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50] -; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_comisd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_comisd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-NEXT: sete %cl # sched: [1:0.25] -; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-NEXT: vcomisd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-NEXT: sete %dl # sched: [1:0.25] -; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25] -; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25] -; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 8 - %3 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %2) - %4 = or i32 %1, %3 - ret i32 %4 -} -declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone - -define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { -; GENERIC-LABEL: test_cvtdq2pd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtdq2pd: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [8:4.00] -; ATOM-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtdq2pd: -; SLM: # %bb.0: -; SLM-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtdq2pd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtdq2pd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtdq2pd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtdq2pd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtdq2pd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtdq2pd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00] -; BROADWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtdq2pd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtdq2pd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtdq2pd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50] -; SKX-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtdq2pd: -; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtdq2pd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [8:1.00] -; BDVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtdq2pd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [13:1.00] -; BDVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtdq2pd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtdq2pd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtdq2pd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtdq2pd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> - %2 = sitofp <2 x i32> %1 to <2 x double> - %3 = load <4 x i32>, <4 x i32>*%a1, align 16 - %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <2 x i32> - %5 = sitofp <2 x i32> %4 to <2 x double> - %6 = fadd <2 x double> %2, %5 - ret <2 x double> %6 -} - -define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { -; GENERIC-LABEL: test_cvtdq2ps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtdq2ps: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtdq2ps (%rdi), %xmm1 # sched: [7:3.50] -; ATOM-NEXT: cvtdq2ps %xmm0, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] -; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtdq2ps: -; SLM: # %bb.0: -; SLM-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtdq2ps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtdq2ps: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtdq2ps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtdq2ps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtdq2ps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtdq2ps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtdq2ps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtdq2ps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtdq2ps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50] -; SKX-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtdq2ps: -; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtdq2ps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtdq2ps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtdq2ps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtdq2ps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtdq2ps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtdq2ps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sitofp <4 x i32> %a0 to <4 x float> - %2 = load <4 x i32>, <4 x i32>*%a1, align 16 - %3 = sitofp <4 x i32> %2 to <4 x float> - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} - -define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_cvtpd2dq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtpd2dq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [8:4.00] -; ATOM-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtpd2dq: -; SLM: # %bb.0: -; SLM-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtpd2dq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtpd2dq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtpd2dq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtpd2dq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtpd2dq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtpd2dq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtpd2dq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtpd2dq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtpd2dq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtpd2dq: -; SKX: # %bb.0: -; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtpd2dq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [8:1.00] -; BDVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtpd2dq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [13:1.00] -; BDVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtpd2dq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtpd2dq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtpd2dq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtpd2dq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) - %2 = load <2 x double>, <2 x double> *%a1, align 16 - %3 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %2) - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} -declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone - -define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_cvtpd2ps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtpd2ps: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [8:4.00] -; ATOM-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] -; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtpd2ps: -; SLM: # %bb.0: -; SLM-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtpd2ps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtpd2ps: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtpd2ps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtpd2ps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtpd2ps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtpd2ps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtpd2ps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtpd2ps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtpd2ps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtpd2ps: -; SKX: # %bb.0: -; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtpd2ps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [8:1.00] -; BDVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtpd2ps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [13:1.00] -; BDVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtpd2ps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtpd2ps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtpd2ps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtpd2ps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [11:1.00] -; ZNVER1-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) - %2 = load <2 x double>, <2 x double> *%a1, align 16 - %3 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %2) - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} -declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone - -define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_cvtps2dq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtps2dq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtps2dq (%rdi), %xmm1 # sched: [7:3.50] -; ATOM-NEXT: cvtps2dq %xmm0, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtps2dq: -; SLM: # %bb.0: -; SLM-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtps2dq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtps2dq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtps2dq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtps2dq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtps2dq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtps2dq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtps2dq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtps2dq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtps2dq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50] -; SKX-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtps2dq: -; SKX: # %bb.0: -; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtps2dq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtps2dq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtps2dq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtps2dq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtps2dq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtps2dq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %2) - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} -declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone - -define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_cvtps2pd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] -; GENERIC-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtps2pd: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtps2pd (%rdi), %xmm1 # sched: [8:4.00] -; ATOM-NEXT: cvtps2pd %xmm0, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtps2pd: -; SLM: # %bb.0: -; SLM-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtps2pd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] -; SANDY-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtps2pd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtps2pd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] -; HASWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtps2pd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtps2pd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtps2pd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtps2pd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtps2pd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtps2pd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtps2pd: -; SKX: # %bb.0: -; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtps2pd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [8:1.00] -; BDVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtps2pd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [13:1.00] -; BDVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtps2pd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtps2pd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00] -; BTVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtps2pd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtps2pd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [10:1.00] -; ZNVER1-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> - %2 = fpext <2 x float> %1 to <2 x double> - %3 = load <4 x float>, <4 x float> *%a1, align 16 - %4 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> - %5 = fpext <2 x float> %4 to <2 x double> - %6 = fadd <2 x double> %2, %5 - ret <2 x double> %6 -} - -define i32 @test_cvtsd2si(double %a0, double *%a1) { -; GENERIC-LABEL: test_cvtsd2si: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] -; GENERIC-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtsd2si: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtsd2si (%rdi), %eax # sched: [9:4.50] -; ATOM-NEXT: cvtsd2si %xmm0, %ecx # sched: [8:4.00] -; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtsd2si: -; SLM: # %bb.0: -; SLM-NEXT: cvtsd2si (%rdi), %eax # sched: [7:1.00] -; SLM-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:0.50] -; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtsd2si: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] -; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtsd2si: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-NEXT: vcvtsd2si (%rdi), %eax # sched: [10:1.00] -; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtsd2si: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] -; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtsd2si: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtsd2si: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtsd2si: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00] -; BROADWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtsd2si: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtsd2si: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvtsd2si (%rdi), %eax # sched: [11:1.00] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtsd2si: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00] -; SKX-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00] -; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtsd2si: -; SKX: # %bb.0: -; SKX-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] -; SKX-NEXT: vcvtsd2si (%rdi), %eax # sched: [11:1.00] -; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtsd2si: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [18:1.00] -; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [13:1.00] -; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtsd2si: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [18:1.00] -; BDVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [13:1.00] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtsd2si: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00] -; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [7:1.00] -; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtsd2si: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00] -; BTVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [7:1.00] -; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtsd2si: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtsd2si: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00] -; ZNVER1-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <2 x double> undef, double %a0, i32 0 - %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %1) - %3 = load double, double *%a1, align 8 - %4 = insertelement <2 x double> undef, double %3, i32 0 - %5 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %4) - %6 = add i32 %2, %5 - ret i32 %6 -} -declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone - -define i64 @test_cvtsd2siq(double %a0, double *%a1) { -; GENERIC-LABEL: test_cvtsd2siq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] -; GENERIC-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtsd2siq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtsd2si (%rdi), %rax # sched: [9:4.50] -; ATOM-NEXT: cvtsd2si %xmm0, %rcx # sched: [8:4.00] -; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtsd2siq: -; SLM: # %bb.0: -; SLM-NEXT: cvtsd2si (%rdi), %rax # sched: [7:1.00] -; SLM-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:0.50] -; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtsd2siq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] -; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtsd2siq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [10:1.00] -; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtsd2siq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] -; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtsd2siq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtsd2siq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtsd2siq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00] -; BROADWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtsd2siq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtsd2siq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvtsd2si (%rdi), %rax # sched: [11:1.00] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtsd2siq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00] -; SKX-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00] -; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtsd2siq: -; SKX: # %bb.0: -; SKX-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] -; SKX-NEXT: vcvtsd2si (%rdi), %rax # sched: [11:1.00] -; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtsd2siq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [18:1.00] -; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [13:1.00] -; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtsd2siq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [18:1.00] -; BDVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [13:1.00] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtsd2siq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00] -; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [7:1.00] -; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtsd2siq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00] -; BTVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [7:1.00] -; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtsd2siq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtsd2siq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00] -; ZNVER1-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <2 x double> undef, double %a0, i32 0 - %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %1) - %3 = load double, double *%a1, align 8 - %4 = insertelement <2 x double> undef, double %3, i32 0 - %5 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %4) - %6 = add i64 %2, %5 - ret i64 %6 -} -declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone - -define float @test_cvtsd2ss(double %a0, double *%a1) { -; GENERIC-LABEL: test_cvtsd2ss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtsd2ss: -; ATOM: # %bb.0: -; ATOM-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] -; ATOM-NEXT: cvtsd2ss %xmm0, %xmm2 # sched: [6:3.00] -; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: cvtsd2ss %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: addss %xmm2, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtsd2ss: -; SLM: # %bb.0: -; SLM-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00] -; SLM-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:0.50] -; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtsd2ss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtsd2ss: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] -; SANDY-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtsd2ss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtsd2ss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; HASWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtsd2ss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtsd2ss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; BROADWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtsd2ss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtsd2ss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKYLAKE-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtsd2ss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00] -; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtsd2ss: -; SKX: # %bb.0: -; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtsd2ss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtsd2ss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; BDVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtsd2ss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [7:2.00] -; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] -; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [7:2.00] -; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtsd2ss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [7:2.00] -; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] -; BTVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [7:2.00] -; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtsd2ss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtsd2ss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] -; ZNVER1-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptrunc double %a0 to float - %2 = load double, double *%a1, align 8 - %3 = fptrunc double %2 to float - %4 = fadd float %1, %3 - ret float %4 -} - -define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { -; GENERIC-LABEL: test_cvtsi2sd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtsi2sd: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtsi2sd: -; SLM: # %bb.0: -; SLM-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:0.50] -; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtsi2sd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtsi2sd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtsi2sd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtsi2sd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtsi2sd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtsi2sd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtsi2sd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtsi2sd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtsi2sd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtsi2sd: -; SKX: # %bb.0: -; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtsi2sd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtsi2sd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtsi2sd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [14:1.00] -; BTVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [9:1.00] -; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtsi2sd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [9:1.00] -; BTVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [14:1.00] -; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtsi2sd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtsi2sd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sitofp i32 %a0 to double - %2 = load i32, i32 *%a1, align 8 - %3 = sitofp i32 %2 to double - %4 = fadd double %1, %3 - ret double %4 -} - -define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { -; GENERIC-LABEL: test_cvtsi2sdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtsi2sdq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtsi2sdq: -; SLM: # %bb.0: -; SLM-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:0.50] -; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtsi2sdq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtsi2sdq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtsi2sdq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtsi2sdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtsi2sdq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtsi2sdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtsi2sdq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtsi2sdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtsi2sdq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtsi2sdq: -; SKX: # %bb.0: -; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtsi2sdq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [13:1.00] -; BDVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtsi2sdq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtsi2sdq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [14:1.00] -; BTVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [9:1.00] -; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtsi2sdq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [9:1.00] -; BTVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [14:1.00] -; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtsi2sdq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtsi2sdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sitofp i64 %a0 to double - %2 = load i64, i64 *%a1, align 8 - %3 = sitofp i64 %2 to double - %4 = fadd double %1, %3 - ret double %4 -} - -; TODO - cvtss2sd_m - -define double @test_cvtss2sd(float %a0, float *%a1) { -; GENERIC-LABEL: test_cvtss2sd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtss2sd: -; ATOM: # %bb.0: -; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00] -; ATOM-NEXT: cvtss2sd %xmm0, %xmm2 # sched: [6:3.00] -; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: cvtss2sd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: addsd %xmm2, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtss2sd: -; SLM: # %bb.0: -; SLM-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00] -; SLM-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:0.50] -; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtss2sd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00] -; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtss2sd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtss2sd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00] -; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtss2sd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00] -; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtss2sd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtss2sd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00] -; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtss2sd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtss2sd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtss2sd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00] -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtss2sd: -; SKX: # %bb.0: -; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtss2sd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtss2sd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtss2sd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [7:2.00] -; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [7:2.00] -; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtss2sd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [7:2.00] -; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [7:2.00] -; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtss2sd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtss2sd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fpext float %a0 to double - %2 = load float, float *%a1, align 4 - %3 = fpext float %2 to double - %4 = fadd double %1, %3 - ret double %4 -} - -define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_cvttpd2dq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvttpd2dq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [8:4.00] -; ATOM-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvttpd2dq: -; SLM: # %bb.0: -; SLM-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvttpd2dq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvttpd2dq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvttpd2dq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvttpd2dq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvttpd2dq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttpd2dq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvttpd2dq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttpd2dq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvttpd2dq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttpd2dq: -; SKX: # %bb.0: -; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvttpd2dq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [8:1.00] -; BDVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvttpd2dq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [13:1.00] -; BDVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvttpd2dq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvttpd2dq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvttpd2dq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvttpd2dq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptosi <2 x double> %a0 to <2 x i32> - %2 = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> - %3 = load <2 x double>, <2 x double> *%a1, align 16 - %4 = fptosi <2 x double> %3 to <2 x i32> - %5 = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> - %6 = add <4 x i32> %2, %5 - ret <4 x i32> %6 -} - -define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_cvttps2dq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvttps2dq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvttps2dq (%rdi), %xmm1 # sched: [7:3.50] -; ATOM-NEXT: cvttps2dq %xmm0, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvttps2dq: -; SLM: # %bb.0: -; SLM-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvttps2dq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvttps2dq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvttps2dq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvttps2dq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvttps2dq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttps2dq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvttps2dq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttps2dq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvttps2dq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50] -; SKX-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttps2dq: -; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvttps2dq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvttps2dq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvttps2dq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvttps2dq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvttps2dq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvttps2dq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptosi <4 x float> %a0 to <4 x i32> - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = fptosi <4 x float> %2 to <4 x i32> - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} - -define i32 @test_cvttsd2si(double %a0, double *%a1) { -; GENERIC-LABEL: test_cvttsd2si: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] -; GENERIC-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvttsd2si: -; ATOM: # %bb.0: -; ATOM-NEXT: cvttsd2si (%rdi), %eax # sched: [9:4.50] -; ATOM-NEXT: cvttsd2si %xmm0, %ecx # sched: [8:4.00] -; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvttsd2si: -; SLM: # %bb.0: -; SLM-NEXT: cvttsd2si (%rdi), %eax # sched: [7:1.00] -; SLM-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:0.50] -; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvttsd2si: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] -; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvttsd2si: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-NEXT: vcvttsd2si (%rdi), %eax # sched: [10:1.00] -; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvttsd2si: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] -; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvttsd2si: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvttsd2si: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttsd2si: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00] -; BROADWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvttsd2si: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttsd2si: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvttsd2si (%rdi), %eax # sched: [11:1.00] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvttsd2si: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00] -; SKX-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00] -; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttsd2si: -; SKX: # %bb.0: -; SKX-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] -; SKX-NEXT: vcvttsd2si (%rdi), %eax # sched: [11:1.00] -; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvttsd2si: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [18:1.00] -; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [13:1.00] -; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvttsd2si: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [18:1.00] -; BDVER2-NEXT: vcvttsd2si %xmm0, %ecx # sched: [13:1.00] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvttsd2si: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00] -; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [7:1.00] -; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvttsd2si: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [12:1.00] -; BTVER2-NEXT: vcvttsd2si %xmm0, %ecx # sched: [7:1.00] -; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvttsd2si: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvttsd2si: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvttsd2si (%rdi), %eax # sched: [12:1.00] -; ZNVER1-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptosi double %a0 to i32 - %2 = load double, double *%a1, align 8 - %3 = fptosi double %2 to i32 - %4 = add i32 %1, %3 - ret i32 %4 -} - -define i64 @test_cvttsd2siq(double %a0, double *%a1) { -; GENERIC-LABEL: test_cvttsd2siq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] -; GENERIC-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvttsd2siq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvttsd2si (%rdi), %rax # sched: [9:4.50] -; ATOM-NEXT: cvttsd2si %xmm0, %rcx # sched: [8:4.00] -; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvttsd2siq: -; SLM: # %bb.0: -; SLM-NEXT: cvttsd2si (%rdi), %rax # sched: [7:1.00] -; SLM-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:0.50] -; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvttsd2siq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] -; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvttsd2siq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvttsd2si (%rdi), %rax # sched: [10:1.00] -; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvttsd2siq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] -; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvttsd2siq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvttsd2siq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttsd2siq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00] -; BROADWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvttsd2siq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttsd2siq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvttsd2si (%rdi), %rax # sched: [11:1.00] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvttsd2siq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00] -; SKX-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00] -; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttsd2siq: -; SKX: # %bb.0: -; SKX-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] -; SKX-NEXT: vcvttsd2si (%rdi), %rax # sched: [11:1.00] -; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvttsd2siq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [18:1.00] -; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [13:1.00] -; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvttsd2siq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [18:1.00] -; BDVER2-NEXT: vcvttsd2si %xmm0, %rcx # sched: [13:1.00] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvttsd2siq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00] -; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [7:1.00] -; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvttsd2siq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [12:1.00] -; BTVER2-NEXT: vcvttsd2si %xmm0, %rcx # sched: [7:1.00] -; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvttsd2siq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvttsd2siq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvttsd2si (%rdi), %rax # sched: [12:1.00] -; ZNVER1-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptosi double %a0 to i64 - %2 = load double, double *%a1, align 8 - %3 = fptosi double %2 to i64 - %4 = add i64 %1, %3 - ret i64 %4 -} - -define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_divpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: divpd %xmm1, %xmm0 # sched: [22:22.00] -; GENERIC-NEXT: divpd (%rdi), %xmm0 # sched: [28:22.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_divpd: -; ATOM: # %bb.0: -; ATOM-NEXT: divpd %xmm1, %xmm0 # sched: [125:62.50] -; ATOM-NEXT: divpd (%rdi), %xmm0 # sched: [125:62.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_divpd: -; SLM: # %bb.0: -; SLM-NEXT: divpd %xmm1, %xmm0 # sched: [69:69.00] -; SLM-NEXT: divpd (%rdi), %xmm0 # sched: [72:69.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_divpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [22:22.00] -; SANDY-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [28:22.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_divpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:22.00] -; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:22.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_divpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [20:14.00] -; HASWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [26:14.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_divpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:14.00] -; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [26:14.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_divpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:8.00] -; BROADWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [19:8.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_divpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:8.00] -; BROADWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:8.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_divpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:3.00] -; SKYLAKE-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:4.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_divpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:3.00] -; SKYLAKE-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_divpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:3.00] -; SKX-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:4.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_divpd: -; SKX: # %bb.0: -; SKX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:3.00] -; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_divpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [9:9.50] -; BDVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [14:9.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_divpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [9:9.50] -; BDVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [14:9.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_divpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [19:19.00] -; BTVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [24:19.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_divpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00] -; BTVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [24:19.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_divpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [15:1.00] -; ZNVER1-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [22:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_divpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [15:1.00] -; ZNVER1-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [22:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fdiv <2 x double> %a0, %a1 - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = fdiv <2 x double> %1, %2 - ret <2 x double> %3 -} - -define double @test_divsd(double %a0, double %a1, double *%a2) { -; GENERIC-LABEL: test_divsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: divsd %xmm1, %xmm0 # sched: [22:22.00] -; GENERIC-NEXT: divsd (%rdi), %xmm0 # sched: [28:22.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_divsd: -; ATOM: # %bb.0: -; ATOM-NEXT: divsd %xmm1, %xmm0 # sched: [62:31.00] -; ATOM-NEXT: divsd (%rdi), %xmm0 # sched: [62:31.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_divsd: -; SLM: # %bb.0: -; SLM-NEXT: divsd %xmm1, %xmm0 # sched: [34:32.00] -; SLM-NEXT: divsd (%rdi), %xmm0 # sched: [37:32.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_divsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [22:22.00] -; SANDY-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [28:22.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_divsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:22.00] -; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:22.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_divsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [20:14.00] -; HASWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [25:14.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_divsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:14.00] -; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [25:14.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_divsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:4.00] -; BROADWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:8.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_divsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:4.00] -; BROADWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:8.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_divsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:3.00] -; SKYLAKE-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:4.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_divsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:3.00] -; SKYLAKE-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_divsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:3.00] -; SKX-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:4.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_divsd: -; SKX: # %bb.0: -; SKX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:3.00] -; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_divsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [9:9.50] -; BDVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [14:9.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_divsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [9:9.50] -; BDVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [14:9.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_divsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [19:19.00] -; BTVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [24:19.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_divsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00] -; BTVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [24:19.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_divsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [15:1.00] -; ZNVER1-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [22:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_divsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [15:1.00] -; ZNVER1-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [22:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fdiv double %a0, %a1 - %2 = load double, double *%a2, align 8 - %3 = fdiv double %1, %2 - ret double %3 -} - -define void @test_lfence() { -; GENERIC-LABEL: test_lfence: -; GENERIC: # %bb.0: -; GENERIC-NEXT: lfence # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lfence: -; ATOM: # %bb.0: -; ATOM-NEXT: lfence # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lfence: -; SLM: # %bb.0: -; SLM-NEXT: lfence # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_lfence: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: lfence # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_lfence: -; SANDY: # %bb.0: -; SANDY-NEXT: lfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_lfence: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: lfence # sched: [2:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_lfence: -; HASWELL: # %bb.0: -; HASWELL-NEXT: lfence # sched: [2:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_lfence: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: lfence # sched: [2:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lfence: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: lfence # sched: [2:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_lfence: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: lfence # sched: [2:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lfence: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: lfence # sched: [2:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_lfence: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: lfence # sched: [2:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_lfence: -; SKX: # %bb.0: -; SKX-NEXT: lfence # sched: [2:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_lfence: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: lfence # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_lfence: -; BDVER2: # %bb.0: -; BDVER2-NEXT: lfence # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_lfence: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: lfence # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_lfence: -; BTVER2: # %bb.0: -; BTVER2-NEXT: lfence # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_lfence: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: lfence # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_lfence: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: lfence # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.sse2.lfence() - ret void -} -declare void @llvm.x86.sse2.lfence() nounwind readnone - -define void @test_mfence() { -; GENERIC-LABEL: test_mfence: -; GENERIC: # %bb.0: -; GENERIC-NEXT: mfence # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_mfence: -; ATOM: # %bb.0: -; ATOM-NEXT: mfence # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_mfence: -; SLM: # %bb.0: -; SLM-NEXT: mfence # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_mfence: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: mfence # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_mfence: -; SANDY: # %bb.0: -; SANDY-NEXT: mfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_mfence: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: mfence # sched: [2:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_mfence: -; HASWELL: # %bb.0: -; HASWELL-NEXT: mfence # sched: [2:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_mfence: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: mfence # sched: [2:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mfence: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: mfence # sched: [2:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_mfence: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: mfence # sched: [3:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mfence: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: mfence # sched: [3:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_mfence: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mfence # sched: [3:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mfence: -; SKX: # %bb.0: -; SKX-NEXT: mfence # sched: [3:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_mfence: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: mfence # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_mfence: -; BDVER2: # %bb.0: -; BDVER2-NEXT: mfence # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_mfence: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: mfence # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_mfence: -; BTVER2: # %bb.0: -; BTVER2-NEXT: mfence # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_mfence: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: mfence # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_mfence: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: mfence # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.sse2.mfence() - ret void -} -declare void @llvm.x86.sse2.mfence() nounwind readnone - -define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { -; GENERIC-LABEL: test_maskmovdqu: -; GENERIC: # %bb.0: -; GENERIC-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_maskmovdqu: -; ATOM: # %bb.0: -; ATOM-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_maskmovdqu: -; SLM: # %bb.0: -; SLM-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_maskmovdqu: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maskmovdqu: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_maskmovdqu: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_maskmovdqu: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_maskmovdqu: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maskmovdqu: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_maskmovdqu: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maskmovdqu: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_maskmovdqu: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maskmovdqu: -; SKX: # %bb.0: -; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_maskmovdqu: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_maskmovdqu: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_maskmovdqu: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_maskmovdqu: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_maskmovdqu: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_maskmovdqu: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) - ret void -} -declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind - -define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_maxpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_maxpd: -; ATOM: # %bb.0: -; ATOM-NEXT: maxpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: maxpd (%rdi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_maxpd: -; SLM: # %bb.0: -; SLM-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: maxpd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_maxpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maxpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_maxpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_maxpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_maxpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maxpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_maxpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maxpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_maxpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maxpd: -; SKX: # %bb.0: -; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_maxpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_maxpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_maxpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_maxpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_maxpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_maxpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %1, <2 x double> %2) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone - -define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_maxsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_maxsd: -; ATOM: # %bb.0: -; ATOM-NEXT: maxsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: maxsd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_maxsd: -; SLM: # %bb.0: -; SLM-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: maxsd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_maxsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maxsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_maxsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_maxsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_maxsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maxsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_maxsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maxsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_maxsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maxsd: -; SKX: # %bb.0: -; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_maxsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_maxsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_maxsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_maxsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_maxsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_maxsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %1, <2 x double> %2) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone - -define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_minpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_minpd: -; ATOM: # %bb.0: -; ATOM-NEXT: minpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: minpd (%rdi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_minpd: -; SLM: # %bb.0: -; SLM-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: minpd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_minpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_minpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_minpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_minpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_minpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_minpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_minpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_minpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_minpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_minpd: -; SKX: # %bb.0: -; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_minpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_minpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_minpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_minpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_minpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_minpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %1, <2 x double> %2) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone - -define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_minsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_minsd: -; ATOM: # %bb.0: -; ATOM-NEXT: minsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: minsd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_minsd: -; SLM: # %bb.0: -; SLM-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: minsd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_minsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_minsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_minsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_minsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_minsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_minsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_minsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_minsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_minsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_minsd: -; SKX: # %bb.0: -; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_minsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_minsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_minsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_minsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_minsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_minsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %1, <2 x double> %2) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone - -define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_movapd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movapd: -; ATOM: # %bb.0: -; ATOM-NEXT: movapd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: addpd %xmm0, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movapd: -; SLM: # %bb.0: -; SLM-NEXT: movapd (%rdi), %xmm0 # sched: [3:1.00] -; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movapd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movapd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movapd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movapd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movapd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movapd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movapd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movapd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movapd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movapd: -; SKX: # %bb.0: -; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movapd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movapd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movapd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movapd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movapd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movapd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovapd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x double>, <2 x double> *%a0, align 16 - %2 = fadd <2 x double> %1, %1 - store <2 x double> %2, <2 x double> *%a1, align 16 - ret void -} - -define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_movdqa: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movdqa: -; ATOM: # %bb.0: -; ATOM-NEXT: movdqa (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: paddq %xmm0, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movdqa: -; SLM: # %bb.0: -; SLM-NEXT: movdqa (%rdi), %xmm0 # sched: [3:1.00] -; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movdqa: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movdqa: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movdqa: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movdqa: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movdqa: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movdqa: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movdqa: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movdqa: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movdqa: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movdqa: -; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movdqa: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movdqa: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movdqa: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movdqa: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movdqa: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movdqa: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovdqa (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x i64>, <2 x i64> *%a0, align 16 - %2 = add <2 x i64> %1, %1 - store <2 x i64> %2, <2 x i64> *%a1, align 16 - ret void -} - -define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_movdqu: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movdqu: -; ATOM: # %bb.0: -; ATOM-NEXT: movdqu (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: paddq %xmm0, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: movdqu %xmm0, (%rsi) # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movdqu: -; SLM: # %bb.0: -; SLM-NEXT: movdqu (%rdi), %xmm0 # sched: [3:1.00] -; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; SLM-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movdqu: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movdqu: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movdqu: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movdqu: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movdqu: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movdqu: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movdqu: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movdqu: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movdqu: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movdqu: -; SKX: # %bb.0: -; SKX-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movdqu: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movdqu: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movdqu: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movdqu: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movdqu: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movdqu: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovdqu (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x i64>, <2 x i64> *%a0, align 1 - %2 = add <2 x i64> %1, %1 - store <2 x i64> %2, <2 x i64> *%a1, align 1 - ret void -} - -define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_movd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; GENERIC-NEXT: movd %xmm2, %eax # sched: [2:1.00] -; GENERIC-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movd: -; ATOM: # %bb.0: -; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00] -; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movd %xmm1, %eax # sched: [3:3.00] -; ATOM-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movd: -; SLM: # %bb.0: -; SLM-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [3:1.00] -; SLM-NEXT: movd %edi, %xmm1 # sched: [1:0.50] -; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; SLM-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: movd %xmm2, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; SANDY-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; SANDY-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] -; SANDY-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; SANDY-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; HASWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00] -; HASWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; HASWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00] -; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; BROADWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00] -; BROADWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; SKX-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33] -; SKX-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] -; SKX-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movd: -; SKX: # %bb.0: -; SKX-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [10:0.50] -; BDVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movd %xmm2, %eax # sched: [10:1.00] -; BDVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovd %edi, %xmm1 # sched: [10:0.50] -; BDVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vmovd %xmm0, %eax # sched: [10:1.00] -; BDVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [8:0.50] -; BTVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movd %xmm2, %eax # sched: [4:1.00] -; BTVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovd %edi, %xmm1 # sched: [8:0.50] -; BTVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovd %xmm0, %eax # sched: [4:1.00] -; BTVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: movd %edi, %xmm1 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vmovd %edi, %xmm1 # sched: [3:1.00] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vmovd %xmm1, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <4 x i32> undef, i32 %a1, i32 0 - %2 = load i32, i32 *%a2 - %3 = insertelement <4 x i32> undef, i32 %2, i32 0 - %4 = add <4 x i32> %a0, %1 - %5 = add <4 x i32> %a0, %3 - %6 = extractelement <4 x i32> %4, i32 0 - %7 = extractelement <4 x i32> %5, i32 0 - store i32 %6, i32* %a2 - ret i32 %7 -} - -define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: test_movd_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] -; GENERIC-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; GENERIC-NEXT: movq %xmm2, %rax # sched: [2:1.00] -; GENERIC-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movd_64: -; ATOM: # %bb.0: -; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] -; ATOM-NEXT: movq %rdi, %xmm2 # sched: [1:1.00] -; ATOM-NEXT: paddq %xmm0, %xmm1 # sched: [2:1.00] -; ATOM-NEXT: paddq %xmm0, %xmm2 # sched: [2:1.00] -; ATOM-NEXT: movq %xmm1, %rax # sched: [3:3.00] -; ATOM-NEXT: movq %xmm2, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movd_64: -; SLM: # %bb.0: -; SLM-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [3:1.00] -; SLM-NEXT: movq %rdi, %xmm1 # sched: [1:0.50] -; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; SLM-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: movq %xmm2, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movd_64: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] -; SANDY-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] -; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; SANDY-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] -; SANDY-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movd_64: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; SANDY-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movd_64: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] -; HASWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00] -; HASWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movd_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; HASWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movd_64: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movd_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; BROADWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movd_64: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movd_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movd_64: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] -; SKX-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] -; SKX-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] -; SKX-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movd_64: -; SKX: # %bb.0: -; SKX-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; SKX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; SKX-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movd_64: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; BDVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [10:0.50] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movq %xmm2, %rax # sched: [10:1.00] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movd_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovq %rdi, %xmm1 # sched: [10:0.50] -; BDVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vmovq %xmm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movd_64: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00] -; BTVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [8:0.50] -; BTVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movq %xmm2, %rax # sched: [4:1.00] -; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movd_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovq %rdi, %xmm1 # sched: [8:0.50] -; BTVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovq %xmm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movd_64: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: movq %rdi, %xmm1 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movd_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50] -; ZNVER1-NEXT: vmovq %rdi, %xmm1 # sched: [3:1.00] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vmovq %xmm1, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <2 x i64> undef, i64 %a1, i64 0 - %2 = load i64, i64 *%a2 - %3 = insertelement <2 x i64> undef, i64 %2, i64 0 - %4 = add <2 x i64> %a0, %1 - %5 = add <2 x i64> %a0, %3 - %6 = extractelement <2 x i64> %4, i64 0 - %7 = extractelement <2 x i64> %5, i64 0 - store i64 %6, i64* %a2 - ret i64 %7 -} - -define <2 x double> @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { -; GENERIC-LABEL: test_movhpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movhpd: -; ATOM: # %bb.0: -; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm2 # sched: [6:3.00] -; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: movhpd %xmm2, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movhpd: -; SLM: # %bb.0: -; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movhpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movhpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movhpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movhpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movhpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movhpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movhpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movhpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movhpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movhpd: -; SKX: # %bb.0: -; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movhpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movhpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movhpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movhpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movhpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movhpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast x86_mmx* %a2 to double* - %2 = load double, double *%1, align 8 - %3 = insertelement <2 x double> %a1, double %2, i32 1 - %4 = fadd <2 x double> %a0, %3 - %5 = extractelement <2 x double> %4, i32 1 - store double %5, double* %1 - ret <2 x double> %3 -} - -define <2 x double> @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { -; GENERIC-LABEL: test_movlpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movlpd: -; ATOM: # %bb.0: -; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm2 # sched: [6:3.00] -; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: movlpd %xmm2, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movlpd: -; SLM: # %bb.0: -; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movlpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movlpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movlpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movlpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movlpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movlpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movlpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movlpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movlpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movlpd: -; SKX: # %bb.0: -; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movlpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movlpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movlpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movlpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movlpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movlpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast x86_mmx* %a2 to double* - %2 = load double, double *%1, align 8 - %3 = insertelement <2 x double> %a1, double %2, i32 0 - %4 = fadd <2 x double> %a0, %3 - %5 = extractelement <2 x double> %4, i32 0 - store double %5, double* %1 - ret <2 x double> %3 -} - -define i32 @test_movmskpd(<2 x double> %a0) { -; GENERIC-LABEL: test_movmskpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movmskpd: -; ATOM: # %bb.0: -; ATOM-NEXT: movmskpd %xmm0, %eax # sched: [3:3.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movmskpd: -; SLM: # %bb.0: -; SLM-NEXT: movmskpd %xmm0, %eax # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movmskpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movmskpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movmskpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movmskpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movmskpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movmskpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movmskpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movmskpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movmskpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movmskpd: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movmskpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movmskpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movmskpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movmskpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movmskpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movmskpd %xmm0, %eax # sched: [1:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movmskpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovmskpd %xmm0, %eax # sched: [1:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) - ret i32 %1 -} -declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone - -define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_movntdqa: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movntdqa: -; ATOM: # %bb.0: -; ATOM-NEXT: paddq %xmm0, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movntdqa: -; SLM: # %bb.0: -; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; SLM-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movntdqa: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movntdqa: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movntdqa: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movntdqa: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movntdqa: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movntdqa: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movntdqa: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movntdqa: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movntdqa: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movntdqa: -; SKX: # %bb.0: -; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movntdqa: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movntdqa: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movntdqa: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movntdqa: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movntdqa: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movntdqa: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <2 x i64> %a0, %a0 - store <2 x i64> %1, <2 x i64> *%a1, align 16, !nontemporal !0 - ret void -} - -define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_movntpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movntpd: -; ATOM: # %bb.0: -; ATOM-NEXT: addpd %xmm0, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movntpd: -; SLM: # %bb.0: -; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movntpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movntpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movntpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movntpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movntpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movntpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movntpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movntpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movntpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movntpd: -; SKX: # %bb.0: -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movntpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movntpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movntpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movntpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movntpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movntpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fadd <2 x double> %a0, %a0 - store <2 x double> %1, <2 x double> *%a1, align 16, !nontemporal !0 - ret void -} - -define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { -; GENERIC-LABEL: test_movq_mem: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movq_mem: -; ATOM: # %bb.0: -; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movq_mem: -; SLM: # %bb.0: -; SLM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [3:1.00] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movq_mem: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movq_mem: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movq_mem: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movq_mem: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movq_mem: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movq_mem: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movq_mem: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movq_mem: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movq_mem: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movq_mem: -; SKX: # %bb.0: -; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movq_mem: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movq_mem: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movq_mem: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movq_mem: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movq_mem: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movq_mem: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vmovq %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64* %a1, align 1 - %2 = insertelement <2 x i64> zeroinitializer, i64 %1, i32 0 - %3 = add <2 x i64> %a0, %2 - %4 = extractelement <2 x i64> %3, i32 0 - store i64 %4, i64 *%a1, align 1 - ret <2 x i64> %3 -} - -define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) { -; GENERIC-LABEL: test_movq_reg: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movq_reg: -; ATOM: # %bb.0: -; ATOM-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movq_reg: -; SLM: # %bb.0: -; SLM-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movq_reg: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movq_reg: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; SANDY-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movq_reg: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movq_reg: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; HASWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movq_reg: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movq_reg: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; BROADWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movq_reg: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movq_reg: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; SKYLAKE-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movq_reg: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movq_reg: -; SKX: # %bb.0: -; SKX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movq_reg: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [2:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movq_reg: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [2:0.50] -; BDVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movq_reg: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movq_reg: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] -; BTVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movq_reg: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movq_reg: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25] -; ZNVER1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> - %2 = add <2 x i64> %a1, %1 - ret <2 x i64> %2 -} - -define void @test_movsd_mem(double* %a0, double* %a1) { -; GENERIC-LABEL: test_movsd_mem: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; GENERIC-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movsd_mem: -; ATOM: # %bb.0: -; ATOM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [1:1.00] -; ATOM-NEXT: addsd %xmm0, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movsd_mem: -; SLM: # %bb.0: -; SLM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00] -; SLM-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movsd_mem: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; SANDY-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movsd_mem: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; SANDY-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movsd_mem: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; HASWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movsd_mem: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; HASWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movsd_mem: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movsd_mem: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; BROADWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movsd_mem: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movsd_mem: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKYLAKE-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movsd_mem: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movsd_mem: -; SKX: # %bb.0: -; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movsd_mem: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; BDVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movsd_mem: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; BDVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movsd_mem: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] -; BTVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movsd_mem: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] -; BTVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movsd_mem: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movsd_mem: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] -; ZNVER1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load double, double* %a0, align 1 - %2 = fadd double %1, %1 - store double %2, double *%a1, align 1 - ret void -} - -define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { -; GENERIC-LABEL: test_movsd_reg: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movsd_reg: -; ATOM: # %bb.0: -; ATOM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movsd_reg: -; SLM: # %bb.0: -; SLM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movsd_reg: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movsd_reg: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movsd_reg: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movsd_reg: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movsd_reg: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movsd_reg: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movsd_reg: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movsd_reg: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movsd_reg: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movsd_reg: -; SKX: # %bb.0: -; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movsd_reg: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [2:0.50] -; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movsd_reg: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movsd_reg: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50] -; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movsd_reg: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movsd_reg: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50] -; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movsd_reg: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> - ret <2 x double> %1 -} - -define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_movupd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movupd: -; ATOM: # %bb.0: -; ATOM-NEXT: movupd (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: addpd %xmm0, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: movupd %xmm0, (%rsi) # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movupd: -; SLM: # %bb.0: -; SLM-NEXT: movupd (%rdi), %xmm0 # sched: [3:1.00] -; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movupd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movupd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movupd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movupd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movupd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movupd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movupd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movupd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movupd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movupd: -; SKX: # %bb.0: -; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movupd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movupd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movupd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movupd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movupd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movupd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovupd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x double>, <2 x double> *%a0, align 1 - %2 = fadd <2 x double> %1, %1 - store <2 x double> %2, <2 x double> *%a1, align 1 - ret void -} - -define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_mulpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_mulpd: -; ATOM: # %bb.0: -; ATOM-NEXT: mulpd %xmm1, %xmm0 # sched: [9:4.50] -; ATOM-NEXT: mulpd (%rdi), %xmm0 # sched: [10:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_mulpd: -; SLM: # %bb.0: -; SLM-NEXT: mulpd %xmm1, %xmm0 # sched: [5:2.00] -; SLM-NEXT: mulpd (%rdi), %xmm0 # sched: [8:2.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_mulpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_mulpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_mulpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_mulpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_mulpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50] -; BROADWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [8:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mulpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BROADWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_mulpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mulpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_mulpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mulpd: -; SKX: # %bb.0: -; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_mulpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_mulpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_mulpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:2.00] -; BTVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [9:2.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_mulpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:2.00] -; BTVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_mulpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50] -; ZNVER1-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_mulpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; ZNVER1-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fmul <2 x double> %a0, %a1 - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = fmul <2 x double> %1, %2 - ret <2 x double> %3 -} - -define double @test_mulsd(double %a0, double %a1, double *%a2) { -; GENERIC-LABEL: test_mulsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_mulsd: -; ATOM: # %bb.0: -; ATOM-NEXT: mulsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: mulsd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_mulsd: -; SLM: # %bb.0: -; SLM-NEXT: mulsd %xmm1, %xmm0 # sched: [5:2.00] -; SLM-NEXT: mulsd (%rdi), %xmm0 # sched: [8:2.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_mulsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_mulsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_mulsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_mulsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_mulsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50] -; BROADWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [8:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mulsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BROADWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_mulsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mulsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_mulsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mulsd: -; SKX: # %bb.0: -; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_mulsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_mulsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_mulsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:2.00] -; BTVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:2.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_mulsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:2.00] -; BTVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_mulsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50] -; ZNVER1-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_mulsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; ZNVER1-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fmul double %a0, %a1 - %2 = load double, double *%a2, align 8 - %3 = fmul double %1, %2 - ret double %3 -} - -define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_orpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_orpd: -; ATOM: # %bb.0: -; ATOM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: orpd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_orpd: -; SLM: # %bb.0: -; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: orpd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_orpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_orpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_orpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_orpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_orpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_orpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_orpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_orpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_orpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_orpd: -; SKX: # %bb.0: -; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_orpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_orpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_orpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_orpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_orpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_orpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <2 x double> %a0 to <4 x i32> - %2 = bitcast <2 x double> %a1 to <4 x i32> - %3 = or <4 x i32> %1, %2 - %4 = load <2 x double>, <2 x double> *%a2, align 16 - %5 = bitcast <2 x double> %4 to <4 x i32> - %6 = or <4 x i32> %3, %5 - %7 = bitcast <4 x i32> %6 to <2 x double> - %8 = fadd <2 x double> %a1, %7 - ret <2 x double> %8 -} - -define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_packssdw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_packssdw: -; ATOM: # %bb.0: -; ATOM-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] -; ATOM-NEXT: packssdw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_packssdw: -; SLM: # %bb.0: -; SLM-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: packssdw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_packssdw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_packssdw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_packssdw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_packssdw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_packssdw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packssdw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_packssdw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packssdw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_packssdw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] -; SKX-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packssdw: -; SKX: # %bb.0: -; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_packssdw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_packssdw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_packssdw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_packssdw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_packssdw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_packssdw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) - %2 = bitcast <8 x i16> %1 to <4 x i32> - %3 = load <4 x i32>, <4 x i32> *%a2, align 16 - %4 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %2, <4 x i32> %3) - ret <8 x i16> %4 -} -declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone - -define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_packsswb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_packsswb: -; ATOM: # %bb.0: -; ATOM-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] -; ATOM-NEXT: packsswb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_packsswb: -; SLM: # %bb.0: -; SLM-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: packsswb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_packsswb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_packsswb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_packsswb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_packsswb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_packsswb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packsswb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_packsswb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packsswb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_packsswb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] -; SKX-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packsswb: -; SKX: # %bb.0: -; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_packsswb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_packsswb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_packsswb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_packsswb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_packsswb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_packsswb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) - %2 = bitcast <16 x i8> %1 to <8 x i16> - %3 = load <8 x i16>, <8 x i16> *%a2, align 16 - %4 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %2, <8 x i16> %3) - ret <16 x i8> %4 -} -declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_packuswb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_packuswb: -; ATOM: # %bb.0: -; ATOM-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] -; ATOM-NEXT: packuswb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_packuswb: -; SLM: # %bb.0: -; SLM-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: packuswb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_packuswb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_packuswb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_packuswb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_packuswb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_packuswb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packuswb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_packuswb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packuswb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_packuswb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] -; SKX-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packuswb: -; SKX: # %bb.0: -; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_packuswb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_packuswb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_packuswb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_packuswb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_packuswb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_packuswb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) - %2 = bitcast <16 x i8> %1 to <8 x i16> - %3 = load <8 x i16>, <8 x i16> *%a2, align 16 - %4 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %2, <8 x i16> %3) - ret <16 x i8> %4 -} -declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_paddb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddb: -; ATOM: # %bb.0: -; ATOM-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddb: -; SLM: # %bb.0: -; SLM-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddb: -; SKX: # %bb.0: -; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <16 x i8> %a0, %a1 - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = add <16 x i8> %1, %2 - ret <16 x i8> %3 -} - -define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_paddd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddd: -; ATOM: # %bb.0: -; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddd: -; SLM: # %bb.0: -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddd: -; SKX: # %bb.0: -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <4 x i32> %a0, %a1 - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = add <4 x i32> %1, %2 - ret <4 x i32> %3 -} - -define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_paddq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddq: -; ATOM: # %bb.0: -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: paddq (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddq: -; SLM: # %bb.0: -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddq (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddq: -; SKX: # %bb.0: -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <2 x i64> %a0, %a1 - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = add <2 x i64> %1, %2 - ret <2 x i64> %3 -} - -define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_paddsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddsb: -; ATOM: # %bb.0: -; ATOM-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddsb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddsb: -; SLM: # %bb.0: -; SLM-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddsb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddsb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddsb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddsb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddsb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddsb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddsb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddsb: -; SKX: # %bb.0: -; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddsb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddsb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddsb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddsb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddsb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone - -define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_paddsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddsw: -; ATOM: # %bb.0: -; ATOM-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddsw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddsw: -; SLM: # %bb.0: -; SLM-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddsw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddsw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddsw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddsw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddsw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddsw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddsw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddsw: -; SKX: # %bb.0: -; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddsw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddsw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddsw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_paddusb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddusb: -; ATOM: # %bb.0: -; ATOM-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddusb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddusb: -; SLM: # %bb.0: -; SLM-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddusb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddusb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddusb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddusb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddusb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddusb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddusb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddusb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddusb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddusb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddusb: -; SKX: # %bb.0: -; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddusb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddusb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddusb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddusb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddusb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddusb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone - -define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_paddusw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddusw: -; ATOM: # %bb.0: -; ATOM-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddusw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddusw: -; SLM: # %bb.0: -; SLM-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddusw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddusw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddusw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddusw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddusw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddusw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddusw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddusw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddusw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddusw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddusw: -; SKX: # %bb.0: -; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddusw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddusw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddusw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddusw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddusw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddusw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_paddw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddw: -; ATOM: # %bb.0: -; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddw: -; SLM: # %bb.0: -; SLM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddw: -; SKX: # %bb.0: -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <8 x i16> %a0, %a1 - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = add <8 x i16> %1, %2 - ret <8 x i16> %3 -} - -define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_pand: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pand: -; ATOM: # %bb.0: -; ATOM-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pand (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pand: -; SLM: # %bb.0: -; SLM-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pand (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pand: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pand: -; SANDY: # %bb.0: -; SANDY-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pand: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pand: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pand: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pand: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pand: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pand: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pand: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pand: -; SKX: # %bb.0: -; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pand: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pand: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pand: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pand: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pand: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pand (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pand: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = and <2 x i64> %a0, %a1 - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = and <2 x i64> %1, %2 - %4 = add <2 x i64> %3, %a1 - ret <2 x i64> %4 -} - -define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_pandn: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] -; GENERIC-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pandn: -; ATOM: # %bb.0: -; ATOM-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: pandn (%rdi), %xmm1 # sched: [1:1.00] -; ATOM-NEXT: paddq %xmm0, %xmm1 # sched: [2:1.00] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pandn: -; SLM: # %bb.0: -; SLM-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: pandn (%rdi), %xmm1 # sched: [4:1.00] -; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pandn: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] -; SANDY-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pandn: -; SANDY: # %bb.0: -; SANDY-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pandn: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] -; HASWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] -; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pandn: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pandn: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pandn: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [6:0.50] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pandn: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pandn: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pandn: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] -; SKX-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] -; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] -; SKX-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pandn: -; SKX: # %bb.0: -; SKX-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pandn: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pandn: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pandn: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pandn: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [6:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pandn: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pandn: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = xor <2 x i64> %a0, - %2 = and <2 x i64> %a1, %1 - %3 = load <2 x i64>, <2 x i64> *%a2, align 16 - %4 = xor <2 x i64> %2, - %5 = and <2 x i64> %3, %4 - %6 = add <2 x i64> %2, %5 - ret <2 x i64> %6 -} - -define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pavgb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pavgb: -; ATOM: # %bb.0: -; ATOM-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pavgb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pavgb: -; SLM: # %bb.0: -; SLM-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pavgb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pavgb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pavgb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pavgb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pavgb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pavgb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pavgb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pavgb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pavgb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pavgb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pavgb: -; SKX: # %bb.0: -; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pavgb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pavgb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pavgb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pavgb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pavgb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pavgb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = zext <16 x i8> %a0 to <16 x i16> - %2 = zext <16 x i8> %a1 to <16 x i16> - %3 = add <16 x i16> %1, %2 - %4 = add <16 x i16> %3, - %5 = lshr <16 x i16> %4, - %6 = trunc <16 x i16> %5 to <16 x i8> - %7 = load <16 x i8>, <16 x i8> *%a2, align 16 - %8 = zext <16 x i8> %6 to <16 x i16> - %9 = zext <16 x i8> %7 to <16 x i16> - %10 = add <16 x i16> %8, %9 - %11 = add <16 x i16> %10, - %12 = lshr <16 x i16> %11, - %13 = trunc <16 x i16> %12 to <16 x i8> - ret <16 x i8> %13 -} - -define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pavgw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pavgw: -; ATOM: # %bb.0: -; ATOM-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pavgw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pavgw: -; SLM: # %bb.0: -; SLM-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pavgw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pavgw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pavgw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pavgw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pavgw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pavgw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pavgw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pavgw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pavgw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pavgw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pavgw: -; SKX: # %bb.0: -; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pavgw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pavgw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pavgw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pavgw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pavgw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pavgw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = zext <8 x i16> %a0 to <8 x i32> - %2 = zext <8 x i16> %a1 to <8 x i32> - %3 = add <8 x i32> %1, %2 - %4 = add <8 x i32> %3, - %5 = lshr <8 x i32> %4, - %6 = trunc <8 x i32> %5 to <8 x i16> - %7 = load <8 x i16>, <8 x i16> *%a2, align 16 - %8 = zext <8 x i16> %6 to <8 x i32> - %9 = zext <8 x i16> %7 to <8 x i32> - %10 = add <8 x i32> %8, %9 - %11 = add <8 x i32> %10, - %12 = lshr <8 x i32> %11, - %13 = trunc <8 x i32> %12 to <8 x i16> - ret <8 x i16> %13 -} - -define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pcmpeqb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpeqb: -; ATOM: # %bb.0: -; ATOM-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpeqb: -; SLM: # %bb.0: -; SLM-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpeqb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpeqb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpeqb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpeqb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpeqb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpeqb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpeqb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; SKX-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqb: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpeqb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpeqb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpeqb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpeqb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpeqb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpeqb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp eq <16 x i8> %a0, %a1 - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = icmp eq <16 x i8> %a0, %2 - %4 = or <16 x i1> %1, %3 - %5 = sext <16 x i1> %4 to <16 x i8> - ret <16 x i8> %5 -} - -define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pcmpeqd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpeqd: -; ATOM: # %bb.0: -; ATOM-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpeqd: -; SLM: # %bb.0: -; SLM-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpeqd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpeqd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpeqd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpeqd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpeqd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpeqd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpeqd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqd: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpeqd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpeqd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpeqd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpeqd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpeqd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpeqd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp eq <4 x i32> %a0, %a1 - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = icmp eq <4 x i32> %a0, %2 - %4 = or <4 x i1> %1, %3 - %5 = sext <4 x i1> %4 to <4 x i32> - ret <4 x i32> %5 -} - -define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pcmpeqw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpeqw: -; ATOM: # %bb.0: -; ATOM-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpeqw: -; SLM: # %bb.0: -; SLM-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpeqw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpeqw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpeqw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpeqw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpeqw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpeqw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpeqw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; SKX-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqw: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpeqw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpeqw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpeqw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpeqw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpeqw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpeqw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp eq <8 x i16> %a0, %a1 - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = icmp eq <8 x i16> %a0, %2 - %4 = or <8 x i1> %1, %3 - %5 = sext <8 x i1> %4 to <8 x i16> - ret <8 x i16> %5 -} - -define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pcmpgtb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; GENERIC-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; GENERIC-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpgtb: -; ATOM: # %bb.0: -; ATOM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpgtb: -; SLM: # %bb.0: -; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpgtb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SANDY-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; SANDY-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpgtb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpgtb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; HASWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpgtb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpgtb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpgtb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpgtb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; SKX-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtb: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpgtb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpgtb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpgtb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpgtb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpgtb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpgtb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp sgt <16 x i8> %a0, %a1 - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = icmp sgt <16 x i8> %a0, %2 - %4 = or <16 x i1> %1, %3 - %5 = sext <16 x i1> %4 to <16 x i8> - ret <16 x i8> %5 -} - -define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pcmpgtd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; GENERIC-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpgtd: -; ATOM: # %bb.0: -; ATOM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpgtd: -; SLM: # %bb.0: -; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpgtd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SANDY-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpgtd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpgtd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; HASWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpgtd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpgtd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpgtd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpgtd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtd: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpgtd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpgtd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpgtd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpgtd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpgtd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpgtd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp sgt <4 x i32> %a0, %a1 - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = icmp eq <4 x i32> %a0, %2 - %4 = or <4 x i1> %1, %3 - %5 = sext <4 x i1> %4 to <4 x i32> - ret <4 x i32> %5 -} - -define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pcmpgtw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; GENERIC-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; GENERIC-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpgtw: -; ATOM: # %bb.0: -; ATOM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpgtw: -; SLM: # %bb.0: -; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpgtw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SANDY-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; SANDY-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpgtw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpgtw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; HASWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpgtw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpgtw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpgtw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpgtw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; SKX-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtw: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpgtw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpgtw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpgtw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpgtw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpgtw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpgtw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp sgt <8 x i16> %a0, %a1 - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = icmp sgt <8 x i16> %a0, %2 - %4 = or <8 x i1> %1, %3 - %5 = sext <8 x i1> %4 to <8 x i16> - ret <8 x i16> %5 -} - -define i16 @test_pextrw(<8 x i16> %a0) { -; GENERIC-LABEL: test_pextrw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] -; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pextrw: -; ATOM: # %bb.0: -; ATOM-NEXT: pextrw $6, %xmm0, %eax # sched: [4:2.00] -; ATOM-NEXT: # kill: def $ax killed $ax killed $eax -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pextrw: -; SLM: # %bb.0: -; SLM-NEXT: pextrw $6, %xmm0, %eax # sched: [1:1.00] -; SLM-NEXT: # kill: def $ax killed $ax killed $eax -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pextrw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] -; SANDY-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pextrw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] -; SANDY-NEXT: # kill: def $ax killed $ax killed $eax -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pextrw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00] -; HASWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pextrw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pextrw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pextrw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pextrw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pextrw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pextrw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] -; SKX-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pextrw: -; SKX: # %bb.0: -; SKX-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] -; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pextrw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [13:1.00] -; BDVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pextrw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [13:1.00] -; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pextrw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] -; BTVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pextrw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pextrw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:2.00] -; ZNVER1-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pextrw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:2.00] -; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = extractelement <8 x i16> %a0, i32 6 - ret i16 %1 -} - -define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) { -; GENERIC-LABEL: test_pinsrw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pinsrw: -; ATOM: # %bb.0: -; ATOM-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:1.00] -; ATOM-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pinsrw: -; SLM: # %bb.0: -; SLM-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:1.00] -; SLM-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pinsrw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pinsrw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pinsrw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] -; HASWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pinsrw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pinsrw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] -; BROADWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pinsrw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pinsrw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] -; SKYLAKE-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pinsrw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKYLAKE-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pinsrw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] -; SKX-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pinsrw: -; SKX: # %bb.0: -; SKX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pinsrw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pinsrw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pinsrw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [7:0.50] -; BTVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pinsrw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [7:0.50] -; BTVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pinsrw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pinsrw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <8 x i16> %a0, i16 %a1, i32 1 - %2 = load i16, i16 *%a2 - %3 = insertelement <8 x i16> %1, i16 %2, i32 3 - ret <8 x i16> %3 -} - -define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pmaddwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmaddwd: -; ATOM: # %bb.0: -; ATOM-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmaddwd: -; SLM: # %bb.0: -; SLM-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmaddwd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmaddwd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmaddwd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmaddwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmaddwd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaddwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmaddwd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaddwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmaddwd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaddwd: -; SKX: # %bb.0: -; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmaddwd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmaddwd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmaddwd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmaddwd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmaddwd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmaddwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) - %2 = bitcast <4 x i32> %1 to <8 x i16> - %3 = load <8 x i16>, <8 x i16> *%a2, align 16 - %4 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %2, <8 x i16> %3) - ret <4 x i32> %4 -} -declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pmaxsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmaxsw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pmaxsw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmaxsw: -; SLM: # %bb.0: -; SLM-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pmaxsw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmaxsw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmaxsw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmaxsw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmaxsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmaxsw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmaxsw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmaxsw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxsw: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmaxsw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmaxsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmaxsw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmaxsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmaxsw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmaxsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pmaxub: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmaxub: -; ATOM: # %bb.0: -; ATOM-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pmaxub (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmaxub: -; SLM: # %bb.0: -; SLM-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pmaxub (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmaxub: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmaxub: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmaxub: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmaxub: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmaxub: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxub: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmaxub: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxub: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmaxub: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxub: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmaxub: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmaxub: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmaxub: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmaxub: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmaxub: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmaxub: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone - -define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pminsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pminsw: -; ATOM: # %bb.0: -; ATOM-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pminsw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pminsw: -; SLM: # %bb.0: -; SLM-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pminsw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pminsw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pminsw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pminsw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pminsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pminsw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pminsw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pminsw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminsw: -; SKX: # %bb.0: -; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pminsw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pminsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pminsw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pminsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pminsw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pminsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pminub: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pminub: -; ATOM: # %bb.0: -; ATOM-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pminub (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pminub: -; SLM: # %bb.0: -; SLM-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pminub (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pminub: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pminub: -; SANDY: # %bb.0: -; SANDY-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pminub: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pminub: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pminub: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminub: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pminub: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminub: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pminub: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminub: -; SKX: # %bb.0: -; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pminub: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pminub: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pminub: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pminub: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pminub: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pminub: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone - -define i32 @test_pmovmskb(<16 x i8> %a0) { -; GENERIC-LABEL: test_pmovmskb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmovmskb: -; ATOM: # %bb.0: -; ATOM-NEXT: pmovmskb %xmm0, %eax # sched: [3:3.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmovmskb: -; SLM: # %bb.0: -; SLM-NEXT: pmovmskb %xmm0, %eax # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovmskb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovmskb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovmskb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovmskb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovmskb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovmskb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovmskb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovmskb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovmskb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovmskb: -; SKX: # %bb.0: -; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovmskb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [13:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovmskb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [13:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovmskb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovmskb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovmskb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [1:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovmskb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovmskb %xmm0, %eax # sched: [1:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) - ret i32 %1 -} -declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone - -define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pmulhuw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmulhuw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: pmulhuw (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmulhuw: -; SLM: # %bb.0: -; SLM-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmulhuw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmulhuw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmulhuw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmulhuw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmulhuw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmulhuw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmulhuw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmulhuw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmulhuw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmulhuw: -; SKX: # %bb.0: -; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmulhuw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmulhuw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmulhuw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmulhuw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmulhuw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmulhuw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pmulhw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmulhw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: pmulhw (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmulhw: -; SLM: # %bb.0: -; SLM-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmulhw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmulhw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmulhw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmulhw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmulhw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmulhw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmulhw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmulhw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmulhw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmulhw: -; SKX: # %bb.0: -; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmulhw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmulhw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmulhw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmulhw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmulhw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmulhw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pmullw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmullw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmullw %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: pmullw (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmullw: -; SLM: # %bb.0: -; SLM-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmullw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmullw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmullw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmullw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmullw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmullw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmullw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmullw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmullw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmullw: -; SKX: # %bb.0: -; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmullw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmullw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmullw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmullw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmullw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmullw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = mul <8 x i16> %a0, %a1 - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = mul <8 x i16> %1, %2 - ret <8 x i16> %3 -} - -define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pmuludq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmuludq: -; ATOM: # %bb.0: -; ATOM-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: pmuludq (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmuludq: -; SLM: # %bb.0: -; SLM-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmuludq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmuludq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmuludq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmuludq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmuludq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmuludq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmuludq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmuludq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmuludq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmuludq: -; SKX: # %bb.0: -; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmuludq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmuludq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmuludq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmuludq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmuludq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmuludq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) - %2 = bitcast <2 x i64> %1 to <4 x i32> - %3 = load <4 x i32>, <4 x i32> *%a2, align 16 - %4 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %2, <4 x i32> %3) - ret <2 x i64> %4 -} -declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone - -define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_por: -; GENERIC: # %bb.0: -; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_por: -; ATOM: # %bb.0: -; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: por (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_por: -; SLM: # %bb.0: -; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: por (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_por: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_por: -; SANDY: # %bb.0: -; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_por: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_por: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_por: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_por: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_por: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_por: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_por: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_por: -; SKX: # %bb.0: -; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_por: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_por: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_por: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_por: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_por: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: por (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_por: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = or <2 x i64> %a0, %a1 - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = or <2 x i64> %1, %2 - %4 = add <2 x i64> %3, %a1 - ret <2 x i64> %4 -} - -define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_psadbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psadbw: -; ATOM: # %bb.0: -; ATOM-NEXT: psadbw %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: psadbw (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psadbw: -; SLM: # %bb.0: -; SLM-NEXT: psadbw %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: psadbw (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psadbw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psadbw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psadbw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psadbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psadbw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psadbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psadbw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psadbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psadbw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] -; SKX-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psadbw: -; SKX: # %bb.0: -; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psadbw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [4:0.50] -; BDVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psadbw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; BDVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psadbw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psadbw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psadbw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psadbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) - %2 = bitcast <2 x i64> %1 to <16 x i8> - %3 = load <16 x i8>, <16 x i8> *%a2, align 16 - %4 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %2, <16 x i8> %3) - ret <2 x i64> %4 -} -declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone - -define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { -; GENERIC-LABEL: test_pshufd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] -; GENERIC-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pshufd: -; ATOM: # %bb.0: -; ATOM-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] -; ATOM-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [1:1.00] -; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pshufd: -; SLM: # %bb.0: -; SLM-NEXT: pshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [4:1.00] -; SLM-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pshufd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] -; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pshufd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50] -; SANDY-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pshufd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] -; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pshufd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; HASWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pshufd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] -; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pshufd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; BROADWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pshufd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pshufd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pshufd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] -; SKX-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pshufd: -; SKX: # %bb.0: -; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pshufd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [2:0.50] -; BDVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pshufd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50] -; BDVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [2:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pshufd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] -; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pshufd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] -; BTVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pshufd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.25] -; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pshufd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50] -; ZNVER1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.25] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> - %2 = load <4 x i32>, <4 x i32> *%a1, align 16 - %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} - -define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { -; GENERIC-LABEL: test_pshufhw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] -; GENERIC-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] -; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pshufhw: -; ATOM: # %bb.0: -; ATOM-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; ATOM-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00] -; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pshufhw: -; SLM: # %bb.0: -; SLM-NEXT: pshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [4:1.00] -; SLM-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pshufhw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] -; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] -; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pshufhw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] -; SANDY-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pshufhw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pshufhw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; HASWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] -; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pshufhw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pshufhw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] -; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pshufhw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pshufhw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pshufhw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] -; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pshufhw: -; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pshufhw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [2:0.50] -; BDVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pshufhw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] -; BDVER2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [2:0.50] -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pshufhw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] -; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pshufhw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] -; BTVER2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] -; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pshufhw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25] -; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pshufhw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50] -; ZNVER1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25] -; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> - %2 = load <8 x i16>, <8 x i16> *%a1, align 16 - %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> - %4 = add <8 x i16> %1, %3 - ret <8 x i16> %4 -} - -define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { -; GENERIC-LABEL: test_pshuflw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] -; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pshuflw: -; ATOM: # %bb.0: -; ATOM-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; ATOM-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00] -; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pshuflw: -; SLM: # %bb.0: -; SLM-NEXT: pshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [4:1.00] -; SLM-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pshuflw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] -; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pshuflw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; SANDY-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pshuflw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pshuflw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; HASWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] -; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pshuflw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pshuflw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] -; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pshuflw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pshuflw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pshuflw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] -; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pshuflw: -; SKX: # %bb.0: -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pshuflw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [2:0.50] -; BDVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pshuflw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] -; BDVER2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [2:0.50] -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pshuflw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pshuflw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] -; BTVER2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pshuflw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25] -; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pshuflw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50] -; ZNVER1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25] -; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> - %2 = load <8 x i16>, <8 x i16> *%a1, align 16 - %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> - %4 = add <8 x i16> %1, %3 - ret <8 x i16> %4 -} - -define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pslld: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: pslld $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pslld: -; ATOM: # %bb.0: -; ATOM-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: pslld (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: pslld $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pslld: -; SLM: # %bb.0: -; SLM-NEXT: pslld %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: pslld (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: pslld $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pslld: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pslld: -; SANDY: # %bb.0: -; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pslld: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pslld: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pslld: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pslld: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pslld: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pslld: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pslld: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pslld: -; SKX: # %bb.0: -; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pslld: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pslld: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pslld: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pslld: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pslld: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pslld: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %1, <4 x i32> %2) - %4 = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %3, i32 2) - ret <4 x i32> %4 -} -declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone -declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone - -define <4 x i32> @test_pslldq(<4 x i32> %a0) { -; GENERIC-LABEL: test_pslldq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pslldq: -; ATOM: # %bb.0: -; ATOM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pslldq: -; SLM: # %bb.0: -; SLM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pslldq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pslldq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pslldq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pslldq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pslldq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pslldq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pslldq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pslldq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pslldq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pslldq: -; SKX: # %bb.0: -; SKX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pslldq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pslldq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pslldq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pslldq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pslldq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pslldq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> - ret <4 x i32> %1 -} - -define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_psllq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psllq $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psllq: -; ATOM: # %bb.0: -; ATOM-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psllq (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psllq $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psllq: -; SLM: # %bb.0: -; SLM-NEXT: psllq %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psllq (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psllq $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psllq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psllq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psllq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psllq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psllq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psllq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psllq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psllq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psllq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psllq: -; SKX: # %bb.0: -; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psllq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psllq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psllq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psllq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psllq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psllq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %1, <2 x i64> %2) - %4 = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %3, i32 2) - ret <2 x i64> %4 -} -declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone -declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone - -define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psllw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psllw $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psllw: -; ATOM: # %bb.0: -; ATOM-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psllw (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psllw $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psllw: -; SLM: # %bb.0: -; SLM-NEXT: psllw %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psllw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psllw $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psllw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psllw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psllw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psllw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psllw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psllw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psllw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psllw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psllw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psllw: -; SKX: # %bb.0: -; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psllw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psllw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psllw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psllw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psllw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psllw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %1, <8 x i16> %2) - %4 = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %3, i32 2) - ret <8 x i16> %4 -} -declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone -declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone - -define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_psrad: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psrad $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrad: -; ATOM: # %bb.0: -; ATOM-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psrad (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psrad $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrad: -; SLM: # %bb.0: -; SLM-NEXT: psrad %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psrad (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psrad $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psrad: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psrad: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psrad: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psrad: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psrad: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrad: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psrad: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrad: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psrad: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrad: -; SKX: # %bb.0: -; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psrad: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psrad: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psrad: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psrad: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psrad: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psrad: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> %2) - %4 = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2) - ret <4 x i32> %4 -} -declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone -declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone - -define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psraw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psraw $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psraw: -; ATOM: # %bb.0: -; ATOM-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psraw (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psraw $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psraw: -; SLM: # %bb.0: -; SLM-NEXT: psraw %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psraw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psraw $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psraw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psraw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psraw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psraw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psraw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psraw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psraw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psraw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psraw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psraw: -; SKX: # %bb.0: -; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psraw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psraw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psraw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psraw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psraw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psraw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> %2) - %4 = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2) - ret <8 x i16> %4 -} -declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone -declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone - -define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_psrld: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psrld $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrld: -; ATOM: # %bb.0: -; ATOM-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psrld (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psrld $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrld: -; SLM: # %bb.0: -; SLM-NEXT: psrld %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psrld (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psrld $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psrld: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psrld: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psrld: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psrld: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psrld: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrld: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psrld: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrld: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psrld: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrld: -; SKX: # %bb.0: -; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psrld: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psrld: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psrld: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psrld: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psrld: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psrld: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %1, <4 x i32> %2) - %4 = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %3, i32 2) - ret <4 x i32> %4 -} -declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone -declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone - -define <4 x i32> @test_psrldq(<4 x i32> %a0) { -; GENERIC-LABEL: test_psrldq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrldq: -; ATOM: # %bb.0: -; ATOM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrldq: -; SLM: # %bb.0: -; SLM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psrldq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psrldq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psrldq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psrldq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psrldq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrldq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psrldq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrldq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psrldq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrldq: -; SKX: # %bb.0: -; SKX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psrldq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psrldq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psrldq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psrldq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psrldq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psrldq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> - ret <4 x i32> %1 -} - -define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_psrlq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrlq: -; ATOM: # %bb.0: -; ATOM-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psrlq (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrlq: -; SLM: # %bb.0: -; SLM-NEXT: psrlq %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psrlq (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psrlq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psrlq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psrlq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psrlq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psrlq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrlq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psrlq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrlq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psrlq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrlq: -; SKX: # %bb.0: -; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psrlq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psrlq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psrlq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psrlq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psrlq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psrlq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %1, <2 x i64> %2) - %4 = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %3, i32 2) - ret <2 x i64> %4 -} -declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone -declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone - -define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psrlw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrlw: -; ATOM: # %bb.0: -; ATOM-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psrlw (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrlw: -; SLM: # %bb.0: -; SLM-NEXT: psrlw %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psrlw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psrlw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psrlw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psrlw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psrlw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psrlw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrlw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psrlw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrlw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psrlw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrlw: -; SKX: # %bb.0: -; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psrlw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psrlw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psrlw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psrlw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psrlw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psrlw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %1, <8 x i16> %2) - %4 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %3, i32 2) - ret <8 x i16> %4 -} -declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone -declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone - -define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_psubb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubb: -; ATOM: # %bb.0: -; ATOM-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubb: -; SLM: # %bb.0: -; SLM-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubb: -; SKX: # %bb.0: -; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sub <16 x i8> %a0, %a1 - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = sub <16 x i8> %1, %2 - ret <16 x i8> %3 -} - -define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_psubd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubd: -; ATOM: # %bb.0: -; ATOM-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubd: -; SLM: # %bb.0: -; SLM-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubd: -; SKX: # %bb.0: -; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sub <4 x i32> %a0, %a1 - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = sub <4 x i32> %1, %2 - ret <4 x i32> %3 -} - -define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_psubq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubq: -; ATOM: # %bb.0: -; ATOM-NEXT: psubq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psubq (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubq: -; SLM: # %bb.0: -; SLM-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubq (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubq: -; SKX: # %bb.0: -; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sub <2 x i64> %a0, %a1 - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = sub <2 x i64> %1, %2 - ret <2 x i64> %3 -} - -define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_psubsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubsb: -; ATOM: # %bb.0: -; ATOM-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubsb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubsb: -; SLM: # %bb.0: -; SLM-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubsb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubsb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubsb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubsb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubsb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubsb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubsb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubsb: -; SKX: # %bb.0: -; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubsb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubsb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubsb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubsb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubsb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone - -define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psubsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubsw: -; ATOM: # %bb.0: -; ATOM-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubsw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubsw: -; SLM: # %bb.0: -; SLM-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubsw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubsw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubsw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubsw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubsw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubsw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubsw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubsw: -; SKX: # %bb.0: -; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubsw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubsw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubsw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_psubusb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubusb: -; ATOM: # %bb.0: -; ATOM-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubusb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubusb: -; SLM: # %bb.0: -; SLM-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubusb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubusb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubusb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubusb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubusb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubusb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubusb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubusb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubusb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubusb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubusb: -; SKX: # %bb.0: -; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubusb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubusb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubusb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubusb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubusb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubusb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone - -define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psubusw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubusw: -; ATOM: # %bb.0: -; ATOM-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubusw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubusw: -; SLM: # %bb.0: -; SLM-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubusw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubusw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubusw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubusw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubusw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubusw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubusw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubusw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubusw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubusw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubusw: -; SKX: # %bb.0: -; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubusw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubusw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubusw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubusw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubusw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubusw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psubw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubw: -; ATOM: # %bb.0: -; ATOM-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubw: -; SLM: # %bb.0: -; SLM-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubw: -; SKX: # %bb.0: -; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sub <8 x i16> %a0, %a1 - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = sub <8 x i16> %1, %2 - ret <8 x i16> %3 -} - -define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_punpckhbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckhbw: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; ATOM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckhbw: -; SLM: # %bb.0: -; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpckhbw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpckhbw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpckhbw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpckhbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpckhbw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpckhbw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpckhbw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhbw: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpckhbw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpckhbw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [2:0.50] -; BDVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpckhbw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpckhbw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpckhbw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpckhbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> - ret <16 x i8> %3 -} - -define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_punpckhdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; GENERIC-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckhdq: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; ATOM-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00] -; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckhdq: -; SLM: # %bb.0: -; SLM-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SLM-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [4:1.00] -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpckhdq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpckhdq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpckhdq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpckhdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpckhdq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpckhdq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpckhdq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhdq: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpckhdq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpckhdq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] -; BDVER2-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpckhdq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpckhdq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpckhdq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpckhdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} - -define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_punpckhqdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckhqdq: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; ATOM-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckhqdq: -; SLM: # %bb.0: -; SLM-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SLM-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpckhqdq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpckhqdq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpckhqdq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpckhqdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpckhqdq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhqdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpckhqdq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhqdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpckhqdq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhqdq: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpckhqdq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpckhqdq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpckhqdq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpckhqdq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpckhqdq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpckhqdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> - %4 = add <2 x i64> %1, %3 - ret <2 x i64> %4 -} - -define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_punpckhwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckhwd: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; ATOM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckhwd: -; SLM: # %bb.0: -; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpckhwd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpckhwd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpckhwd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpckhwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpckhwd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpckhwd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpckhwd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhwd: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpckhwd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpckhwd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50] -; BDVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpckhwd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpckhwd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpckhwd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpckhwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> - ret <8 x i16> %3 -} - -define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_punpcklbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpcklbw: -; ATOM: # %bb.0: -; ATOM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; ATOM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpcklbw: -; SLM: # %bb.0: -; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpcklbw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpcklbw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpcklbw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpcklbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpcklbw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpcklbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpcklbw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpcklbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpcklbw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpcklbw: -; SKX: # %bb.0: -; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpcklbw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpcklbw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50] -; BDVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpcklbw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpcklbw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpcklbw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpcklbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] -; ZNVER1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> - ret <16 x i8> %3 -} - -define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_punpckldq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckldq: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00] -; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckldq: -; SLM: # %bb.0: -; SLM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SLM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [4:1.00] -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpckldq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpckldq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpckldq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpckldq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpckldq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckldq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpckldq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckldq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpckldq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckldq: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpckldq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpckldq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpckldq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpckldq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpckldq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpckldq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} - -define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_punpcklqdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpcklqdq: -; ATOM: # %bb.0: -; ATOM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpcklqdq: -; SLM: # %bb.0: -; SLM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SLM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpcklqdq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpcklqdq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpcklqdq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpcklqdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpcklqdq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpcklqdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpcklqdq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpcklqdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpcklqdq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpcklqdq: -; SKX: # %bb.0: -; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpcklqdq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpcklqdq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50] -; BDVER2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpcklqdq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpcklqdq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpcklqdq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpcklqdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25] -; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> - %4 = add <2 x i64> %1, %3 - ret <2 x i64> %4 -} - -define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_punpcklwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpcklwd: -; ATOM: # %bb.0: -; ATOM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; ATOM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpcklwd: -; SLM: # %bb.0: -; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpcklwd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpcklwd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpcklwd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpcklwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpcklwd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpcklwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpcklwd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpcklwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpcklwd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpcklwd: -; SKX: # %bb.0: -; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpcklwd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpcklwd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] -; BDVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpcklwd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpcklwd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpcklwd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpcklwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] -; ZNVER1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> - ret <8 x i16> %3 -} - -define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_pxor: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pxor: -; ATOM: # %bb.0: -; ATOM-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pxor (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pxor: -; SLM: # %bb.0: -; SLM-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pxor (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pxor: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pxor: -; SANDY: # %bb.0: -; SANDY-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pxor: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pxor: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pxor: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pxor: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pxor: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pxor: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pxor: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pxor: -; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pxor: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pxor: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pxor: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pxor: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pxor: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pxor: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = xor <2 x i64> %a0, %a1 - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = xor <2 x i64> %1, %2 - %4 = add <2 x i64> %3, %a1 - ret <2 x i64> %4 -} - -define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_shufpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; GENERIC-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_shufpd: -; ATOM: # %bb.0: -; ATOM-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; ATOM-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_shufpd: -; SLM: # %bb.0: -; SLM-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SLM-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_shufpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_shufpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_shufpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_shufpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_shufpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_shufpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; BROADWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_shufpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_shufpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_shufpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SKX-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_shufpd: -; SKX: # %bb.0: -; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_shufpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [2:0.50] -; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_shufpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [2:0.50] -; BDVER2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_shufpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] -; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_shufpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] -; BTVER2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_shufpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] -; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_shufpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] -; ZNVER1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} - -define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_sqrtpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00] -; GENERIC-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:21.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sqrtpd: -; ATOM: # %bb.0: -; ATOM-NEXT: sqrtpd %xmm0, %xmm1 # sched: [125:62.50] -; ATOM-NEXT: sqrtpd (%rdi), %xmm0 # sched: [125:62.50] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sqrtpd: -; SLM: # %bb.0: -; SLM-NEXT: sqrtpd (%rdi), %xmm1 # sched: [74:70.00] -; SLM-NEXT: sqrtpd %xmm0, %xmm0 # sched: [71:70.00] -; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_sqrtpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00] -; SANDY-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:21.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_sqrtpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:21.00] -; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:21.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_sqrtpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [16:14.00] -; HASWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [22:14.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_sqrtpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [16:14.00] -; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [22:14.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_sqrtpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [16:14.00] -; BROADWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [21:14.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sqrtpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [16:14.00] -; BROADWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [21:14.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_sqrtpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:6.00] -; SKYLAKE-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:6.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sqrtpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:6.00] -; SKYLAKE-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:6.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_sqrtpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:6.00] -; SKX-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:6.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sqrtpd: -; SKX: # %bb.0: -; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:6.00] -; SKX-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:6.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_sqrtpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [9:13.50] -; BDVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [14:13.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_sqrtpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [14:13.50] -; BDVER2-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [9:13.50] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_sqrtpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [27:27.00] -; BTVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [32:27.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_sqrtpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [32:27.00] -; BTVER2-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [27:27.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_sqrtpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:20.00] -; ZNVER1-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:20.00] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_sqrtpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:20.00] -; ZNVER1-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [20:20.00] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) - %2 = load <2 x double>, <2 x double> *%a1, align 16 - %3 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %2) - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} -declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone - -; TODO - sqrtsd_m - -define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_sqrtsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:21.00] -; GENERIC-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:21.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sqrtsd: -; ATOM: # %bb.0: -; ATOM-NEXT: movapd (%rdi), %xmm1 # sched: [1:1.00] -; ATOM-NEXT: sqrtsd %xmm0, %xmm0 # sched: [62:31.00] -; ATOM-NEXT: sqrtsd %xmm1, %xmm1 # sched: [62:31.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sqrtsd: -; SLM: # %bb.0: -; SLM-NEXT: movapd (%rdi), %xmm1 # sched: [3:1.00] -; SLM-NEXT: sqrtsd %xmm0, %xmm0 # sched: [35:35.00] -; SLM-NEXT: sqrtsd %xmm1, %xmm1 # sched: [35:35.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_sqrtsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:21.00] -; SANDY-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:21.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_sqrtsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00] -; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:21.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_sqrtsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [16:14.00] -; HASWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; HASWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [16:14.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_sqrtsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:14.00] -; HASWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] -; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:14.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_sqrtsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [16:8.00] -; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [16:8.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sqrtsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:8.00] -; BROADWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:0.50] -; BROADWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:8.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_sqrtsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:6.00] -; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:6.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sqrtsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00] -; SKYLAKE-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] -; SKYLAKE-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_sqrtsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:6.00] -; SKX-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; SKX-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:6.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sqrtsd: -; SKX: # %bb.0: -; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00] -; SKX-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] -; SKX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_sqrtsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50] -; BDVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [9:13.50] -; BDVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [9:13.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_sqrtsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:0.50] -; BDVER2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [9:13.50] -; BDVER2-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [9:13.50] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_sqrtsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:1.00] -; BTVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [27:27.00] -; BTVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [27:27.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_sqrtsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:1.00] -; BTVER2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [27:27.00] -; BTVER2-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [27:27.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_sqrtsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:20.00] -; ZNVER1-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:20.00] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_sqrtsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovapd (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [20:20.00] -; ZNVER1-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [20:20.00] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) - %2 = load <2 x double>, <2 x double> *%a1, align 16 - %3 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2) - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} -declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone - -define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_subpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_subpd: -; ATOM: # %bb.0: -; ATOM-NEXT: subpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: subpd (%rdi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_subpd: -; SLM: # %bb.0: -; SLM-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: subpd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_subpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_subpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_subpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_subpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_subpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_subpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_subpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_subpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_subpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_subpd: -; SKX: # %bb.0: -; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_subpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_subpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_subpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_subpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_subpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_subpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fsub <2 x double> %a0, %a1 - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = fsub <2 x double> %1, %2 - ret <2 x double> %3 -} - -define double @test_subsd(double %a0, double %a1, double *%a2) { -; GENERIC-LABEL: test_subsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_subsd: -; ATOM: # %bb.0: -; ATOM-NEXT: subsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: subsd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_subsd: -; SLM: # %bb.0: -; SLM-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: subsd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_subsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_subsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_subsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_subsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_subsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_subsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_subsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_subsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_subsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_subsd: -; SKX: # %bb.0: -; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_subsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_subsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_subsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_subsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_subsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_subsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fsub double %a0, %a1 - %2 = load double, double *%a2, align 8 - %3 = fsub double %1, %2 - ret double %3 -} - -define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_ucomisd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: setnp %al # sched: [1:0.50] -; GENERIC-NEXT: sete %cl # sched: [1:0.50] -; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] -; GENERIC-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: setnp %al # sched: [1:0.50] -; GENERIC-NEXT: sete %dl # sched: [1:0.50] -; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] -; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] -; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_ucomisd: -; ATOM: # %bb.0: -; ATOM-NEXT: ucomisd %xmm1, %xmm0 # sched: [9:4.50] -; ATOM-NEXT: setnp %al # sched: [1:0.50] -; ATOM-NEXT: sete %cl # sched: [1:0.50] -; ATOM-NEXT: andb %al, %cl # sched: [1:0.50] -; ATOM-NEXT: ucomisd (%rdi), %xmm0 # sched: [10:5.00] -; ATOM-NEXT: setnp %al # sched: [1:0.50] -; ATOM-NEXT: sete %dl # sched: [1:0.50] -; ATOM-NEXT: andb %al, %dl # sched: [1:0.50] -; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50] -; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_ucomisd: -; SLM: # %bb.0: -; SLM-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: setnp %al # sched: [1:0.50] -; SLM-NEXT: sete %cl # sched: [1:0.50] -; SLM-NEXT: andb %al, %cl # sched: [1:0.50] -; SLM-NEXT: ucomisd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: setnp %al # sched: [1:0.50] -; SLM-NEXT: sete %dl # sched: [1:0.50] -; SLM-NEXT: andb %al, %dl # sched: [1:0.50] -; SLM-NEXT: orb %cl, %dl # sched: [1:0.50] -; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_ucomisd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] -; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] -; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] -; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] -; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] -; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] -; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_ucomisd: -; SANDY: # %bb.0: -; SANDY-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.50] -; SANDY-NEXT: sete %cl # sched: [1:0.50] -; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.50] -; SANDY-NEXT: sete %dl # sched: [1:0.50] -; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] -; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] -; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_ucomisd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] -; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] -; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_ucomisd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %cl # sched: [1:0.50] -; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %dl # sched: [1:0.50] -; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] -; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] -; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_ucomisd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] -; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] -; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ucomisd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-NEXT: sete %cl # sched: [1:0.50] -; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-NEXT: sete %dl # sched: [1:0.50] -; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] -; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] -; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_ucomisd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ucomisd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] -; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] -; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_ucomisd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] -; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] -; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_ucomisd: -; SKX: # %bb.0: -; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKX-NEXT: setnp %al # sched: [1:0.50] -; SKX-NEXT: sete %cl # sched: [1:0.50] -; SKX-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] -; SKX-NEXT: setnp %al # sched: [1:0.50] -; SKX-NEXT: sete %dl # sched: [1:0.50] -; SKX-NEXT: andb %al, %dl # sched: [1:0.25] -; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_ucomisd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_ucomisd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-NEXT: vucomisd (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50] -; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50] -; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_ucomisd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_ucomisd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-NEXT: sete %cl # sched: [1:0.50] -; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-NEXT: sete %dl # sched: [1:0.50] -; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50] -; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50] -; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_ucomisd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_ucomisd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-NEXT: sete %cl # sched: [1:0.25] -; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-NEXT: vucomisd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-NEXT: sete %dl # sched: [1:0.25] -; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25] -; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25] -; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 8 - %3 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %2) - %4 = or i32 %1, %3 - ret i32 %4 -} -declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone - -define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_unpckhpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_unpckhpd: -; ATOM: # %bb.0: -; ATOM-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; ATOM-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_unpckhpd: -; SLM: # %bb.0: -; SLM-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SLM-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_unpckhpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_unpckhpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_unpckhpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_unpckhpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_unpckhpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_unpckhpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_unpckhpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_unpckhpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_unpckhpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_unpckhpd: -; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_unpckhpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_unpckhpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_unpckhpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_unpckhpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_unpckhpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_unpckhpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} - -define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_unpcklpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_unpcklpd: -; ATOM: # %bb.0: -; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [1:1.00] -; ATOM-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; ATOM-NEXT: addpd %xmm2, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_unpcklpd: -; SLM: # %bb.0: -; SLM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [4:1.00] -; SLM-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_unpcklpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_unpcklpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_unpcklpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_unpcklpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_unpcklpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_unpcklpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] -; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_unpcklpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_unpcklpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_unpcklpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_unpcklpd: -; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_unpcklpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:0.50] -; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [2:0.50] -; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_unpcklpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [2:0.50] -; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_unpcklpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00] -; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:0.50] -; BTVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_unpcklpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:0.50] -; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_unpcklpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_unpcklpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:0.50] -; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = shufflevector <2 x double> %1, <2 x double> %2, <2 x i32> - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} - -define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_xorpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xorpd: -; ATOM: # %bb.0: -; ATOM-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: xorpd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xorpd: -; SLM: # %bb.0: -; SLM-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: xorpd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_xorpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_xorpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_xorpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_xorpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_xorpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xorpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_xorpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xorpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_xorpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xorpd: -; SKX: # %bb.0: -; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_xorpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_xorpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_xorpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_xorpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_xorpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_xorpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <2 x double> %a0 to <4 x i32> - %2 = bitcast <2 x double> %a1 to <4 x i32> - %3 = xor <4 x i32> %1, %2 - %4 = load <2 x double>, <2 x double> *%a2, align 16 - %5 = bitcast <2 x double> %4 to <4 x i32> - %6 = xor <4 x i32> %3, %5 - %7 = bitcast <4 x i32> %6 to <2 x double> - %8 = fadd <2 x double> %a1, %7 - ret <2 x double> %8 -} - -!0 = !{i32 1} Index: test/CodeGen/X86/sse3-schedule.ll =================================================================== --- test/CodeGen/X86/sse3-schedule.ll +++ test/CodeGen/X86/sse3-schedule.ll @@ -1,1549 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 - -define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_addsubpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_addsubpd: -; ATOM: # %bb.0: -; ATOM-NEXT: addsubpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: addsubpd (%rdi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_addsubpd: -; SLM: # %bb.0: -; SLM-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: addsubpd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_addsubpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_addsubpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_addsubpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_addsubpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_addsubpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_addsubpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_addsubpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_addsubpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_addsubpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_addsubpd: -; SKX: # %bb.0: -; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_addsubpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_addsubpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_addsubpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_addsubpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_addsubpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_addsubpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone - -define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_addsubps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_addsubps: -; ATOM: # %bb.0: -; ATOM-NEXT: addsubps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: addsubps (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_addsubps: -; SLM: # %bb.0: -; SLM-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: addsubps (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_addsubps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_addsubps: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_addsubps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_addsubps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_addsubps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_addsubps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_addsubps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_addsubps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_addsubps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_addsubps: -; SKX: # %bb.0: -; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_addsubps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_addsubps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_addsubps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_addsubps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_addsubps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_addsubps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2) - ret <4 x float> %3 -} -declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone - -define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_haddpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] -; GENERIC-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_haddpd: -; ATOM: # %bb.0: -; ATOM-NEXT: haddpd %xmm1, %xmm0 # sched: [8:4.00] -; ATOM-NEXT: haddpd (%rdi), %xmm0 # sched: [9:4.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_haddpd: -; SLM: # %bb.0: -; SLM-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: haddpd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_haddpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] -; SANDY-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_haddpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_haddpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] -; HASWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_haddpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_haddpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] -; BROADWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [10:2.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_haddpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_haddpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00] -; SKYLAKE-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_haddpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_haddpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00] -; SKX-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_haddpd: -; SKX: # %bb.0: -; SKX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_haddpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [16:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_haddpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_haddpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [4:1.00] -; BTVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [9:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_haddpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BTVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_haddpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_haddpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone - -define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_haddps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] -; GENERIC-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_haddps: -; ATOM: # %bb.0: -; ATOM-NEXT: haddps %xmm1, %xmm0 # sched: [8:4.00] -; ATOM-NEXT: haddps (%rdi), %xmm0 # sched: [9:4.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_haddps: -; SLM: # %bb.0: -; SLM-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: haddps (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_haddps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] -; SANDY-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_haddps: -; SANDY: # %bb.0: -; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_haddps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] -; HASWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_haddps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_haddps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] -; BROADWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [10:2.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_haddps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_haddps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00] -; SKYLAKE-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_haddps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_haddps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00] -; SKX-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_haddps: -; SKX: # %bb.0: -; SKX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_haddps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [16:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_haddps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_haddps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [4:1.00] -; BTVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [9:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_haddps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BTVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_haddps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_haddps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2) - ret <4 x float> %3 -} -declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone - -define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_hsubpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] -; GENERIC-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_hsubpd: -; ATOM: # %bb.0: -; ATOM-NEXT: hsubpd %xmm1, %xmm0 # sched: [8:4.00] -; ATOM-NEXT: hsubpd (%rdi), %xmm0 # sched: [9:4.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_hsubpd: -; SLM: # %bb.0: -; SLM-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: hsubpd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_hsubpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] -; SANDY-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_hsubpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_hsubpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] -; HASWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_hsubpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_hsubpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] -; BROADWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [10:2.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_hsubpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_hsubpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00] -; SKYLAKE-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_hsubpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_hsubpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00] -; SKX-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_hsubpd: -; SKX: # %bb.0: -; SKX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_hsubpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [16:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_hsubpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_hsubpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [4:1.00] -; BTVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [9:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_hsubpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BTVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_hsubpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_hsubpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone - -define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_hsubps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] -; GENERIC-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_hsubps: -; ATOM: # %bb.0: -; ATOM-NEXT: hsubps %xmm1, %xmm0 # sched: [8:4.00] -; ATOM-NEXT: hsubps (%rdi), %xmm0 # sched: [9:4.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_hsubps: -; SLM: # %bb.0: -; SLM-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: hsubps (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_hsubps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] -; SANDY-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_hsubps: -; SANDY: # %bb.0: -; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_hsubps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] -; HASWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_hsubps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_hsubps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] -; BROADWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [10:2.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_hsubps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BROADWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_hsubps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00] -; SKYLAKE-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_hsubps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKYLAKE-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_hsubps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00] -; SKX-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_hsubps: -; SKX: # %bb.0: -; SKX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_hsubps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [16:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_hsubps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_hsubps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [4:1.00] -; BTVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [9:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_hsubps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BTVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_hsubps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_hsubps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2) - ret <4 x float> %3 -} -declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone - -define <16 x i8> @test_lddqu(i8* %a0) { -; GENERIC-LABEL: test_lddqu: -; GENERIC: # %bb.0: -; GENERIC-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lddqu: -; ATOM: # %bb.0: -; ATOM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lddqu: -; SLM: # %bb.0: -; SLM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_lddqu: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_lddqu: -; SANDY: # %bb.0: -; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_lddqu: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_lddqu: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_lddqu: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lddqu: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_lddqu: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lddqu: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_lddqu: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_lddqu: -; SKX: # %bb.0: -; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_lddqu: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_lddqu: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_lddqu: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_lddqu: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_lddqu: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_lddqu: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vlddqu (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) - ret <16 x i8> %1 -} -declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly - -define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { -; GENERIC-LABEL: test_monitor: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; GENERIC-NEXT: monitor # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_monitor: -; ATOM: # %bb.0: -; ATOM-NEXT: movl %esi, %ecx # sched: [1:0.50] -; ATOM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] -; ATOM-NEXT: monitor # sched: [45:22.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_monitor: -; SLM: # %bb.0: -; SLM-NEXT: movl %esi, %ecx # sched: [1:0.50] -; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] -; SLM-NEXT: monitor # sched: [100:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_monitor: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movl %esi, %ecx # sched: [1:0.33] -; SANDY-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; SANDY-SSE-NEXT: monitor # sched: [100:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_monitor: -; SANDY: # %bb.0: -; SANDY-NEXT: movl %esi, %ecx # sched: [1:0.33] -; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; SANDY-NEXT: monitor # sched: [100:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_monitor: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] -; HASWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; HASWELL-SSE-NEXT: monitor # sched: [100:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_monitor: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] -; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; HASWELL-NEXT: monitor # sched: [100:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_monitor: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] -; BROADWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; BROADWELL-SSE-NEXT: monitor # sched: [100:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_monitor: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] -; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; BROADWELL-NEXT: monitor # sched: [100:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_monitor: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: monitor # sched: [100:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_monitor: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl %esi, %ecx # sched: [1:0.25] -; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; SKYLAKE-NEXT: monitor # sched: [100:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_monitor: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] -; SKX-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; SKX-SSE-NEXT: monitor # sched: [100:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_monitor: -; SKX: # %bb.0: -; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25] -; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; SKX-NEXT: monitor # sched: [100:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_monitor: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; BDVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50] -; BDVER2-SSE-NEXT: monitor # sched: [100:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_monitor: -; BDVER2: # %bb.0: -; BDVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; BDVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] -; BDVER2-NEXT: monitor # sched: [100:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_monitor: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50] -; BTVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; BTVER2-SSE-NEXT: monitor # sched: [100:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_monitor: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] -; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; BTVER2-NEXT: monitor # sched: [100:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_monitor: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] -; ZNVER1-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: monitor # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_monitor: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25] -; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] -; ZNVER1-NEXT: monitor # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2) - ret void -} -declare void @llvm.x86.sse3.monitor(i8*, i32, i32) - -define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_movddup: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] -; GENERIC-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50] -; GENERIC-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movddup: -; ATOM: # %bb.0: -; ATOM-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] -; ATOM-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00] -; ATOM-NEXT: subpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movddup: -; SLM: # %bb.0: -; SLM-NEXT: movddup {{.*#+}} xmm1 = mem[0,0] sched: [4:1.00] -; SLM-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; SLM-NEXT: subpd %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movddup: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] -; SANDY-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50] -; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movddup: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50] -; SANDY-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movddup: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] -; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] -; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movddup: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] -; HASWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movddup: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] -; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] -; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movddup: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; BROADWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] -; BROADWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movddup: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] -; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movddup: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] -; SKYLAKE-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movddup: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] -; SKX-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] -; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movddup: -; SKX: # %bb.0: -; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] -; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movddup: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [2:0.50] -; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [7:0.50] -; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movddup: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [7:0.50] -; BDVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [2:0.50] -; BDVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movddup: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50] -; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:1.00] -; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movddup: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00] -; BTVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50] -; BTVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movddup: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50] -; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movddup: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50] -; ZNVER1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50] -; ZNVER1-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer - %2 = load <2 x double>, <2 x double> *%a1, align 16 - %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer - %4 = fsub <2 x double> %3, %1 ; Use fsub to stop the movddup from being folded as a broadcast load in avx512vl. - ret <2 x double> %4 -} - -define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_movshdup: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] -; GENERIC-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movshdup: -; ATOM: # %bb.0: -; ATOM-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] -; ATOM-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00] -; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movshdup: -; SLM: # %bb.0: -; SLM-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:1.00] -; SLM-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movshdup: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] -; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movshdup: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movshdup: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] -; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movshdup: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movshdup: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] -; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [5:0.50] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movshdup: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:0.50] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movshdup: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movshdup: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movshdup: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] -; SKX-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movshdup: -; SKX: # %bb.0: -; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movshdup: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [2:0.50] -; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [7:0.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movshdup: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [7:0.50] -; BDVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [2:0.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movshdup: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50] -; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movshdup: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00] -; BTVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movshdup: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50] -; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movshdup: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50] -; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} - -define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_movsldup: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] -; GENERIC-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movsldup: -; ATOM: # %bb.0: -; ATOM-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] -; ATOM-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00] -; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movsldup: -; SLM: # %bb.0: -; SLM-NEXT: movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:1.00] -; SLM-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movsldup: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] -; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movsldup: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movsldup: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] -; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movsldup: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movsldup: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] -; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [5:0.50] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movsldup: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:0.50] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movsldup: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movsldup: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movsldup: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] -; SKX-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movsldup: -; SKX: # %bb.0: -; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movsldup: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [2:0.50] -; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [7:0.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movsldup: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [7:0.50] -; BDVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [2:0.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movsldup: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50] -; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movsldup: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00] -; BTVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movsldup: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [100:0.25] -; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [100:0.25] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movsldup: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50] -; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} - -define void @test_mwait(i32 %a0, i32 %a1) { -; GENERIC-LABEL: test_mwait: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] -; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: mwait # sched: [100:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_mwait: -; ATOM: # %bb.0: -; ATOM-NEXT: movl %esi, %eax # sched: [1:0.50] -; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50] -; ATOM-NEXT: mwait # sched: [46:23.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_mwait: -; SLM: # %bb.0: -; SLM-NEXT: movl %esi, %eax # sched: [1:0.50] -; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50] -; SLM-NEXT: mwait # sched: [100:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_mwait: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movl %esi, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33] -; SANDY-SSE-NEXT: mwait # sched: [100:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_mwait: -; SANDY: # %bb.0: -; SANDY-NEXT: movl %esi, %eax # sched: [1:0.33] -; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33] -; SANDY-NEXT: mwait # sched: [100:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_mwait: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] -; HASWELL-SSE-NEXT: mwait # sched: [20:2.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_mwait: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl %esi, %eax # sched: [1:0.25] -; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] -; HASWELL-NEXT: mwait # sched: [20:2.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_mwait: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] -; BROADWELL-SSE-NEXT: mwait # sched: [100:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mwait: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl %esi, %eax # sched: [1:0.25] -; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] -; BROADWELL-NEXT: mwait # sched: [100:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_mwait: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: mwait # sched: [20:2.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mwait: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl %esi, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] -; SKYLAKE-NEXT: mwait # sched: [20:2.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_mwait: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] -; SKX-SSE-NEXT: mwait # sched: [20:2.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mwait: -; SKX: # %bb.0: -; SKX-NEXT: movl %esi, %eax # sched: [1:0.25] -; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] -; SKX-NEXT: mwait # sched: [20:2.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_mwait: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50] -; BDVER2-SSE-NEXT: mwait # sched: [100:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_mwait: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl %esi, %eax # sched: [1:0.50] -; BDVER2-NEXT: movl %edi, %ecx # sched: [1:0.50] -; BDVER2-NEXT: mwait # sched: [100:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_mwait: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50] -; BTVER2-SSE-NEXT: mwait # sched: [100:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_mwait: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl %esi, %eax # sched: [1:0.50] -; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.50] -; BTVER2-NEXT: mwait # sched: [100:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_mwait: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] -; ZNVER1-SSE-NEXT: mwait # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_mwait: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25] -; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] -; ZNVER1-NEXT: mwait # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1) - ret void -} -declare void @llvm.x86.sse3.mwait(i32, i32) Index: test/CodeGen/X86/sse41-schedule.ll =================================================================== --- test/CodeGen/X86/sse41-schedule.ll +++ test/CodeGen/X86/sse41-schedule.ll @@ -1,6248 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKX-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2,-xop | FileCheck %s --check-prefixes=CHECK,BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 - -define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_blendpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50] -; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_blendpd: -; SLM: # %bb.0: -; SLM-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:1.00] -; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_blendpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50] -; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_blendpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50] -; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] -; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_blendpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33] -; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_blendpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33] -; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] -; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_blendpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33] -; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blendpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33] -; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_blendpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33] -; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blendpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33] -; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_blendpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33] -; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_blendpd: -; SKX: # %bb.0: -; SKX-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33] -; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_blendpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [2:0.50] -; BDVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_blendpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [2:0.50] -; BDVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_blendpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50] -; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_blendpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50] -; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_blendpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50] -; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_blendpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50] -; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = shufflevector <2 x double> %1, <2 x double> %2, <2 x i32> - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} - -define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_blendps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] -; GENERIC-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_blendps: -; SLM: # %bb.0: -; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00] -; SLM-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [4:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_blendps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] -; SANDY-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_blendps: -; SANDY: # %bb.0: -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] -; SANDY-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_blendps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] -; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_blendps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] -; HASWELL-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_blendps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] -; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:0.50] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blendps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] -; BROADWELL-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:0.50] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_blendps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] -; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blendps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] -; SKYLAKE-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_blendps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] -; SKX-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_blendps: -; SKX: # %bb.0: -; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] -; SKX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_blendps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [2:0.50] -; BDVER2-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_blendps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [2:0.50] -; BDVER2-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_blendps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] -; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_blendps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] -; BTVER2-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_blendps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] -; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_blendps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] -; ZNVER1-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [8:0.50] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} - -define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { -; GENERIC-LABEL: test_blendvpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] -; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00] -; GENERIC-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00] -; GENERIC-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_blendvpd: -; SLM: # %bb.0: -; SLM-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50] -; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] -; SLM-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [1:1.00] -; SLM-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [4:1.00] -; SLM-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_blendvpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] -; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00] -; SANDY-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00] -; SANDY-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_blendvpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_blendvpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] -; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] -; HASWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:2.00] -; HASWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_blendvpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_blendvpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] -; BROADWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00] -; BROADWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blendvpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_blendvpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67] -; SKYLAKE-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67] -; SKYLAKE-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blendvpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] -; SKYLAKE-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_blendvpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33] -; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67] -; SKX-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67] -; SKX-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_blendvpd: -; SKX: # %bb.0: -; SKX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] -; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_blendvpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50] -; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] -; BDVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00] -; BDVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_blendvpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BDVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_blendvpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] -; BTVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00] -; BTVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_blendvpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BTVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_blendvpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [1:0.50] -; ZNVER1-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_blendvpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; ZNVER1-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) - %2 = load <2 x double>, <2 x double> *%a3, align 16 - %3 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %1, <2 x double> %2, <2 x double> %a2) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone - -define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) { -; GENERIC-LABEL: test_blendvps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] -; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00] -; GENERIC-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00] -; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_blendvps: -; SLM: # %bb.0: -; SLM-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50] -; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] -; SLM-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [1:1.00] -; SLM-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [4:1.00] -; SLM-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_blendvps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] -; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00] -; SANDY-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00] -; SANDY-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_blendvps: -; SANDY: # %bb.0: -; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_blendvps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] -; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] -; HASWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:2.00] -; HASWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_blendvps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_blendvps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] -; BROADWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00] -; BROADWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_blendvps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_blendvps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67] -; SKYLAKE-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67] -; SKYLAKE-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_blendvps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] -; SKYLAKE-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_blendvps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33] -; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67] -; SKX-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67] -; SKX-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_blendvps: -; SKX: # %bb.0: -; SKX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] -; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_blendvps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50] -; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] -; BDVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00] -; BDVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_blendvps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BDVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_blendvps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] -; BTVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00] -; BTVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_blendvps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BTVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_blendvps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [1:0.50] -; ZNVER1-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_blendvps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; ZNVER1-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) - %2 = load <4 x float>, <4 x float> *%a3 - %3 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %1, <4 x float> %2, <4 x float> %a2) - ret <4 x float> %3 -} -declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone - -define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_dppd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_dppd: -; SLM: # %bb.0: -; SLM-NEXT: dppd $7, %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: dppd $7, (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_dppd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_dppd: -; SANDY: # %bb.0: -; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_dppd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_dppd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_dppd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_dppd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; BROADWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_dppd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] -; SKYLAKE-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_dppd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; SKYLAKE-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_dppd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_dppd: -; SKX: # %bb.0: -; SKX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_dppd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [15:1.50] -; BDVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [20:1.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_dppd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [15:1.50] -; BDVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [20:1.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_dppd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:3.00] -; BTVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:3.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_dppd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:3.00] -; BTVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:3.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_dppd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_dppd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %1, <2 x double> %2, i8 7) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone - -define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_dpps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00] -; GENERIC-NEXT: dpps $7, (%rdi), %xmm0 # sched: [18:2.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_dpps: -; SLM: # %bb.0: -; SLM-NEXT: dpps $7, %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: dpps $7, (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_dpps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00] -; SANDY-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [18:2.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_dpps: -; SANDY: # %bb.0: -; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00] -; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_dpps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00] -; HASWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [20:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_dpps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] -; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [20:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_dpps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00] -; BROADWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:2.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_dpps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] -; BROADWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_dpps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.50] -; SKYLAKE-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_dpps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.50] -; SKYLAKE-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_dpps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.33] -; SKX-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_dpps: -; SKX: # %bb.0: -; SKX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33] -; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_dpps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [25:1.50] -; BDVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [30:1.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_dpps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [25:1.50] -; BDVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [30:1.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_dpps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [11:3.00] -; BTVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [16:3.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_dpps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [11:3.00] -; BTVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [16:3.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_dpps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_dpps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %1, <4 x float> %2, i8 7) - ret <4 x float> %3 -} -declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone - -define i32 @test_extractps(<4 x float> %a0, i32 *%a1) { -; GENERIC-LABEL: test_extractps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] -; GENERIC-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_extractps: -; SLM: # %bb.0: -; SLM-NEXT: extractps $3, %xmm0, %eax # sched: [1:1.00] -; SLM-NEXT: extractps $1, %xmm0, (%rdi) # sched: [4:2.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_extractps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] -; SANDY-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_extractps: -; SANDY: # %bb.0: -; SANDY-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] -; SANDY-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_extractps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00] -; HASWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_extractps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_extractps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_extractps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_extractps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_extractps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_extractps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] -; SKX-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_extractps: -; SKX: # %bb.0: -; SKX-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] -; SKX-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_extractps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [13:1.00] -; BDVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [13:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_extractps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [13:1.00] -; BDVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [13:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_extractps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] -; BTVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_extractps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_extractps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:2.00] -; ZNVER1-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:2.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_extractps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vextractps $3, %xmm0, %eax # sched: [2:2.00] -; ZNVER1-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:2.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = extractelement <4 x float> %a0, i32 3 - %2 = extractelement <4 x float> %a0, i32 1 - %3 = bitcast float %1 to i32 - %4 = bitcast float %2 to i32 - store i32 %4, i32 *%a1 - ret i32 %3 -} - -define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) { -; GENERIC-LABEL: test_insertps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; GENERIC-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_insertps: -; SLM: # %bb.0: -; SLM-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; SLM-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_insertps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_insertps: -; SANDY: # %bb.0: -; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_insertps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_insertps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_insertps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_insertps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_insertps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_insertps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_insertps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_insertps: -; SKX: # %bb.0: -; SKX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_insertps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [2:0.50] -; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_insertps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [2:0.50] -; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_insertps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] -; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_insertps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] -; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_insertps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] -; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_insertps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] -; ZNVER1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17) - %2 = load float, float *%a2 - %3 = insertelement <4 x float> %1, float %2, i32 3 - ret <4 x float> %3 -} -declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone - -define <2 x i64> @test_movntdqa(i8* %a0) { -; GENERIC-LABEL: test_movntdqa: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_movntdqa: -; SLM: # %bb.0: -; SLM-NEXT: movntdqa (%rdi), %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movntdqa: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movntdqa: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movntdqa: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movntdqa: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movntdqa: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movntdqa: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movntdqa: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movntdqa: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movntdqa: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movntdqa: -; SKX: # %bb.0: -; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movntdqa: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movntdqa: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movntdqa: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movntdqa: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movntdqa: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movntdqa: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) - ret <2 x i64> %1 -} -declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readnone - -define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_mpsadbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_mpsadbw: -; SLM: # %bb.0: -; SLM-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00] -; SLM-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_mpsadbw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_mpsadbw: -; SANDY: # %bb.0: -; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_mpsadbw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00] -; HASWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_mpsadbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00] -; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_mpsadbw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00] -; BROADWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [12:2.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mpsadbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00] -; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_mpsadbw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00] -; SKYLAKE-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mpsadbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] -; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_mpsadbw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00] -; SKX-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mpsadbw: -; SKX: # %bb.0: -; SKX-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] -; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_mpsadbw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [9:2.00] -; BDVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [14:2.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_mpsadbw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [9:2.00] -; BDVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_mpsadbw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [3:2.00] -; BTVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [8:2.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_mpsadbw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BTVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_mpsadbw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_mpsadbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) - %2 = bitcast <8 x i16> %1 to <16 x i8> - %3 = load <16 x i8>, <16 x i8> *%a2, align 16 - %4 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %2, <16 x i8> %3, i8 7) - ret <8 x i16> %4 -} -declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone - -define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_packusdw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_packusdw: -; SLM: # %bb.0: -; SLM-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: packusdw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_packusdw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_packusdw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_packusdw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_packusdw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_packusdw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packusdw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_packusdw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packusdw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_packusdw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] -; SKX-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packusdw: -; SKX: # %bb.0: -; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_packusdw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_packusdw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_packusdw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_packusdw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_packusdw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_packusdw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) - %2 = bitcast <8 x i16> %1 to <4 x i32> - %3 = load <4 x i32>, <4 x i32> *%a2, align 16 - %4 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %2, <4 x i32> %3) - ret <8 x i16> %4 -} -declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone - -define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 x i8> *%a3) { -; GENERIC-LABEL: test_pblendvb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] -; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00] -; GENERIC-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00] -; GENERIC-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pblendvb: -; SLM: # %bb.0: -; SLM-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50] -; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00] -; SLM-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [4:1.00] -; SLM-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pblendvb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] -; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00] -; SANDY-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00] -; SANDY-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pblendvb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pblendvb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] -; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] -; HASWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:2.00] -; HASWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pblendvb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pblendvb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] -; BROADWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00] -; BROADWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pblendvb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pblendvb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67] -; SKYLAKE-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67] -; SKYLAKE-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pblendvb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] -; SKYLAKE-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pblendvb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] -; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67] -; SKX-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67] -; SKX-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pblendvb: -; SKX: # %bb.0: -; SKX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] -; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pblendvb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] -; BDVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00] -; BDVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pblendvb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BDVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pblendvb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] -; BTVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00] -; BTVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pblendvb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BTVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pblendvb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pblendvb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) - %2 = load <16 x i8>, <16 x i8> *%a3, align 16 - %3 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %1, <16 x i8> %2, <16 x i8> %a2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone - -define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pblendw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] -; GENERIC-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50] -; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pblendw: -; SLM: # %bb.0: -; SLM-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; SLM-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [4:1.00] -; SLM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pblendw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] -; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50] -; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pblendw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pblendw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pblendw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; HASWELL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] -; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pblendw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pblendw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; BROADWELL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00] -; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pblendw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pblendw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pblendw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; SKX-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] -; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pblendw: -; SKX: # %bb.0: -; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pblendw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [2:0.50] -; BDVER2-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pblendw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [2:0.50] -; BDVER2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50] -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pblendw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] -; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pblendw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] -; BTVER2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00] -; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pblendw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33] -; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pblendw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33] -; ZNVER1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [8:0.50] -; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = shufflevector <8 x i16> %a1, <8 x i16> %2, <8 x i32> - %4 = add <8 x i16> %1, %3 - ret <8 x i16> %4 -} - -define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_pcmpeqq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pcmpeqq: -; SLM: # %bb.0: -; SLM-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpeqq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpeqq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpeqq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpeqq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpeqq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpeqq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpeqq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqq: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpeqq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpeqq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpeqq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpeqq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpeqq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpeqq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp eq <2 x i64> %a0, %a1 - %2 = sext <2 x i1> %1 to <2 x i64> - %3 = load <2 x i64>, <2 x i64>*%a2, align 16 - %4 = icmp eq <2 x i64> %2, %3 - %5 = sext <2 x i1> %4 to <2 x i64> - ret <2 x i64> %5 -} - -define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) { -; GENERIC-LABEL: test_pextrb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] -; GENERIC-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pextrb: -; SLM: # %bb.0: -; SLM-NEXT: pextrb $3, %xmm0, %eax # sched: [1:1.00] -; SLM-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [4:2.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pextrb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] -; SANDY-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pextrb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] -; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pextrb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00] -; HASWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pextrb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pextrb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pextrb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pextrb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pextrb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pextrb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] -; SKX-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pextrb: -; SKX: # %bb.0: -; SKX-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] -; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pextrb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [13:1.00] -; BDVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [13:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pextrb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [13:1.00] -; BDVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [13:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pextrb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] -; BTVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pextrb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pextrb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:2.00] -; ZNVER1-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:3.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pextrb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:2.00] -; ZNVER1-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:3.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = extractelement <16 x i8> %a0, i32 3 - %2 = extractelement <16 x i8> %a0, i32 1 - store i8 %2, i8 *%a1 - %3 = zext i8 %1 to i32 - ret i32 %3 -} - -define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { -; GENERIC-LABEL: test_pextrd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] -; GENERIC-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pextrd: -; SLM: # %bb.0: -; SLM-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pextrd $3, %xmm0, %eax # sched: [1:1.00] -; SLM-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [4:2.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pextrd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] -; SANDY-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pextrd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] -; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pextrd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00] -; HASWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pextrd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pextrd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pextrd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pextrd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pextrd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pextrd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] -; SKX-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pextrd: -; SKX: # %bb.0: -; SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] -; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pextrd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [13:1.00] -; BDVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [13:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pextrd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [13:1.00] -; BDVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [13:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pextrd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] -; BTVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pextrd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pextrd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:2.00] -; ZNVER1-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:3.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pextrd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:2.00] -; ZNVER1-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:3.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <4 x i32> %a0, %a0 - %2 = extractelement <4 x i32> %1, i32 3 - %3 = extractelement <4 x i32> %1, i32 1 - store i32 %3, i32 *%a1 - ret i32 %2 -} - -define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) { -; GENERIC-LABEL: test_pextrq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] -; GENERIC-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pextrq: -; SLM: # %bb.0: -; SLM-NEXT: pextrq $1, %xmm0, %rax # sched: [1:1.00] -; SLM-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [4:2.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pextrq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] -; SANDY-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pextrq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] -; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pextrq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00] -; HASWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pextrq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00] -; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pextrq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00] -; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pextrq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00] -; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pextrq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pextrq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] -; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pextrq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] -; SKX-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pextrq: -; SKX: # %bb.0: -; SKX-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] -; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pextrq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [13:1.00] -; BDVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [13:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pextrq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [13:1.00] -; BDVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [13:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pextrq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] -; BTVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pextrq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] -; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pextrq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:2.00] -; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:3.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pextrq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:2.00] -; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:3.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = extractelement <2 x i64> %a0, i32 1 - %2 = extractelement <2 x i64> %a0, i32 1 - store i64 %2, i64 *%a2 - ret i64 %1 -} - -define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { -; GENERIC-LABEL: test_pextrw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] -; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pextrw: -; SLM: # %bb.0: -; SLM-NEXT: pextrw $3, %xmm0, %eax # sched: [1:1.00] -; SLM-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [4:2.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pextrw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] -; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pextrw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] -; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pextrw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00] -; HASWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pextrw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pextrw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pextrw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pextrw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pextrw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pextrw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] -; SKX-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pextrw: -; SKX: # %bb.0: -; SKX-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] -; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pextrw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [13:1.00] -; BDVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [13:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pextrw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [13:1.00] -; BDVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [13:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pextrw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] -; BTVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pextrw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pextrw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:2.00] -; ZNVER1-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:3.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pextrw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:2.00] -; ZNVER1-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:3.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = extractelement <8 x i16> %a0, i32 3 - %2 = extractelement <8 x i16> %a0, i32 1 - store i16 %2, i16 *%a1 - %3 = zext i16 %1 to i32 - ret i32 %3 -} - -define <8 x i16> @test_phminposuw(<8 x i16> *%a0) { -; GENERIC-LABEL: test_phminposuw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_phminposuw: -; SLM: # %bb.0: -; SLM-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_phminposuw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_phminposuw: -; SANDY: # %bb.0: -; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_phminposuw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_phminposuw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_phminposuw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phminposuw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_phminposuw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phminposuw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_phminposuw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00] -; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phminposuw: -; SKX: # %bb.0: -; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00] -; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_phminposuw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_phminposuw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_phminposuw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_phminposuw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_phminposuw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_phminposuw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <8 x i16>, <8 x i16> *%a0, align 16 - %2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %1) - %3 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone - -define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) { -; GENERIC-LABEL: test_pinsrb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pinsrb: -; SLM: # %bb.0: -; SLM-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:1.00] -; SLM-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pinsrb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pinsrb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pinsrb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] -; HASWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pinsrb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pinsrb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] -; BROADWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pinsrb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pinsrb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] -; SKYLAKE-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pinsrb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKYLAKE-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pinsrb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] -; SKX-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pinsrb: -; SKX: # %bb.0: -; SKX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pinsrb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pinsrb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pinsrb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [7:0.50] -; BTVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pinsrb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [7:0.50] -; BTVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pinsrb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pinsrb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <16 x i8> %a0, i8 %a1, i32 1 - %2 = load i8, i8 *%a2 - %3 = insertelement <16 x i8> %1, i8 %2, i32 3 - ret <16 x i8> %3 -} - -define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_pinsrd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pinsrd: -; SLM: # %bb.0: -; SLM-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:1.00] -; SLM-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pinsrd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pinsrd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pinsrd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] -; HASWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pinsrd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pinsrd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] -; BROADWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pinsrd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pinsrd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] -; SKYLAKE-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pinsrd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKYLAKE-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pinsrd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] -; SKX-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pinsrd: -; SKX: # %bb.0: -; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pinsrd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pinsrd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pinsrd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [7:0.50] -; BTVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pinsrd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [7:0.50] -; BTVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pinsrd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pinsrd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <4 x i32> %a0, i32 %a1, i32 1 - %2 = load i32, i32 *%a2 - %3 = insertelement <4 x i32> %1, i32 %2, i32 3 - ret <4 x i32> %3 -} - -define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { -; GENERIC-LABEL: test_pinsrq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pinsrq: -; SLM: # %bb.0: -; SLM-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [4:1.00] -; SLM-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:1.00] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pinsrq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pinsrq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pinsrq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] -; HASWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pinsrq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pinsrq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] -; BROADWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pinsrq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pinsrq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] -; SKYLAKE-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pinsrq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] -; SKYLAKE-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pinsrq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] -; SKX-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pinsrq: -; SKX: # %bb.0: -; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] -; SKX-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pinsrq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:0.50] -; BDVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pinsrq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:0.50] -; BDVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pinsrq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [4:1.00] -; BTVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [7:0.50] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pinsrq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [4:1.00] -; BTVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [7:0.50] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pinsrq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pinsrq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <2 x i64> %a0, i64 %a2, i32 1 - %2 = load i64, i64 *%a3 - %3 = insertelement <2 x i64> %a1, i64 %2, i32 1 - %4 = add <2 x i64> %1, %3 - ret <2 x i64> %4 -} - -define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pmaxsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmaxsb: -; SLM: # %bb.0: -; SLM-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pmaxsb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmaxsb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmaxsb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmaxsb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmaxsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmaxsb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmaxsb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmaxsb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxsb: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmaxsb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmaxsb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmaxsb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmaxsb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmaxsb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmaxsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone - -define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pmaxsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmaxsd: -; SLM: # %bb.0: -; SLM-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pmaxsd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmaxsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmaxsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmaxsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmaxsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmaxsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmaxsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmaxsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxsd: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmaxsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmaxsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmaxsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmaxsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmaxsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmaxsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> %2) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone - -define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pmaxud: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmaxud: -; SLM: # %bb.0: -; SLM-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pmaxud (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmaxud: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmaxud: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmaxud: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmaxud: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmaxud: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxud: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmaxud: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxud: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmaxud: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxud: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmaxud: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmaxud: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmaxud: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmaxud: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmaxud: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmaxud: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %1, <4 x i32> %2) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone - -define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pmaxuw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmaxuw: -; SLM: # %bb.0: -; SLM-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pmaxuw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmaxuw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmaxuw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmaxuw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmaxuw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmaxuw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxuw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmaxuw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxuw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmaxuw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxuw: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmaxuw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmaxuw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmaxuw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmaxuw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmaxuw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmaxuw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pminsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pminsb: -; SLM: # %bb.0: -; SLM-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pminsb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pminsb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pminsb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pminsb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pminsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pminsb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pminsb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pminsb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminsb: -; SKX: # %bb.0: -; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pminsb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pminsb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pminsb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pminsb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pminsb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pminsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone - -define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pminsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pminsd: -; SLM: # %bb.0: -; SLM-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pminsd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pminsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pminsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pminsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pminsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pminsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pminsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pminsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminsd: -; SKX: # %bb.0: -; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pminsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pminsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pminsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pminsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pminsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pminsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %1, <4 x i32> %2) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone - -define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pminud: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pminud: -; SLM: # %bb.0: -; SLM-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pminud (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pminud: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pminud: -; SANDY: # %bb.0: -; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pminud: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pminud: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pminud: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminud: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pminud: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminud: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pminud: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminud: -; SKX: # %bb.0: -; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pminud: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pminud: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pminud: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pminud: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pminud: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pminud: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %1, <4 x i32> %2) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone - -define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pminuw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pminuw: -; SLM: # %bb.0: -; SLM-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pminuw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pminuw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pminuw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pminuw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pminuw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pminuw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminuw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pminuw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminuw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pminuw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminuw: -; SKX: # %bb.0: -; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pminuw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pminuw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pminuw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pminuw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pminuw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pminuw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { -; GENERIC-LABEL: test_pmovsxbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmovsxbw: -; SLM: # %bb.0: -; SLM-NEXT: pmovsxbw (%rdi), %xmm1 # sched: [4:1.00] -; SLM-NEXT: pmovsxbw %xmm0, %xmm0 # sched: [1:1.00] -; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovsxbw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovsxbw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovsxbw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] -; HASWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] -; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovsxbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] -; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovsxbw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovsxbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] -; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovsxbw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovsxbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovsxbw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] -; SKX-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] -; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovsxbw: -; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovsxbw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovsxbw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovsxbw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovsxbw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] -; BTVER2-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovsxbw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovsxbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> - %2 = sext <8 x i8> %1 to <8 x i16> - %3 = load <8 x i8>, <8 x i8>* %a1, align 1 - %4 = sext <8 x i8> %3 to <8 x i16> - %5 = add <8 x i16> %2, %4 - ret <8 x i16> %5 -} - -define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { -; GENERIC-LABEL: test_pmovsxbd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmovsxbd: -; SLM: # %bb.0: -; SLM-NEXT: pmovsxbd (%rdi), %xmm1 # sched: [4:1.00] -; SLM-NEXT: pmovsxbd %xmm0, %xmm0 # sched: [1:1.00] -; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovsxbd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovsxbd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovsxbd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] -; HASWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovsxbd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovsxbd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovsxbd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovsxbd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovsxbd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovsxbd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] -; SKX-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovsxbd: -; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovsxbd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovsxbd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovsxbd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovsxbd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] -; BTVER2-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovsxbd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovsxbd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> - %2 = sext <4 x i8> %1 to <4 x i32> - %3 = load <4 x i8>, <4 x i8>* %a1, align 1 - %4 = sext <4 x i8> %3 to <4 x i32> - %5 = add <4 x i32> %2, %4 - ret <4 x i32> %5 -} - -define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { -; GENERIC-LABEL: test_pmovsxbq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmovsxbq: -; SLM: # %bb.0: -; SLM-NEXT: pmovsxbq (%rdi), %xmm1 # sched: [4:1.00] -; SLM-NEXT: pmovsxbq %xmm0, %xmm0 # sched: [1:1.00] -; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovsxbq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovsxbq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovsxbq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] -; HASWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovsxbq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovsxbq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovsxbq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovsxbq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovsxbq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovsxbq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] -; SKX-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovsxbq: -; SKX: # %bb.0: -; SKX-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovsxbq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovsxbq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovsxbq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovsxbq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] -; BTVER2-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovsxbq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovsxbq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> - %2 = sext <2 x i8> %1 to <2 x i64> - %3 = load <2 x i8>, <2 x i8>* %a1, align 1 - %4 = sext <2 x i8> %3 to <2 x i64> - %5 = add <2 x i64> %2, %4 - ret <2 x i64> %5 -} - -define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { -; GENERIC-LABEL: test_pmovsxdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmovsxdq: -; SLM: # %bb.0: -; SLM-NEXT: pmovsxdq (%rdi), %xmm1 # sched: [4:1.00] -; SLM-NEXT: pmovsxdq %xmm0, %xmm0 # sched: [1:1.00] -; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovsxdq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovsxdq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovsxdq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] -; HASWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovsxdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovsxdq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovsxdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovsxdq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovsxdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovsxdq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] -; SKX-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovsxdq: -; SKX: # %bb.0: -; SKX-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovsxdq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovsxdq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovsxdq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovsxdq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] -; BTVER2-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovsxdq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovsxdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> - %2 = sext <2 x i32> %1 to <2 x i64> - %3 = load <2 x i32>, <2 x i32>* %a1, align 1 - %4 = sext <2 x i32> %3 to <2 x i64> - %5 = add <2 x i64> %2, %4 - ret <2 x i64> %5 -} - -define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { -; GENERIC-LABEL: test_pmovsxwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmovsxwd: -; SLM: # %bb.0: -; SLM-NEXT: pmovsxwd (%rdi), %xmm1 # sched: [4:1.00] -; SLM-NEXT: pmovsxwd %xmm0, %xmm0 # sched: [1:1.00] -; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovsxwd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovsxwd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovsxwd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] -; HASWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovsxwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovsxwd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovsxwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovsxwd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovsxwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovsxwd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] -; SKX-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovsxwd: -; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovsxwd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovsxwd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovsxwd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovsxwd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] -; BTVER2-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovsxwd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovsxwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> - %2 = sext <4 x i16> %1 to <4 x i32> - %3 = load <4 x i16>, <4 x i16>* %a1, align 1 - %4 = sext <4 x i16> %3 to <4 x i32> - %5 = add <4 x i32> %2, %4 - ret <4 x i32> %5 -} - -define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { -; GENERIC-LABEL: test_pmovsxwq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmovsxwq: -; SLM: # %bb.0: -; SLM-NEXT: pmovsxwq (%rdi), %xmm1 # sched: [4:1.00] -; SLM-NEXT: pmovsxwq %xmm0, %xmm0 # sched: [1:1.00] -; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovsxwq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovsxwq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovsxwq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] -; HASWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovsxwq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovsxwq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovsxwq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovsxwq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovsxwq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovsxwq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] -; SKX-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovsxwq: -; SKX: # %bb.0: -; SKX-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovsxwq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovsxwq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovsxwq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovsxwq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] -; BTVER2-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovsxwq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovsxwq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> - %2 = sext <2 x i16> %1 to <2 x i64> - %3 = load <2 x i16>, <2 x i16>* %a1, align 1 - %4 = sext <2 x i16> %3 to <2 x i64> - %5 = add <2 x i64> %2, %4 - ret <2 x i64> %5 -} - -define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { -; GENERIC-LABEL: test_pmovzxbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] -; GENERIC-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] -; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmovzxbw: -; SLM: # %bb.0: -; SLM-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [4:1.00] -; SLM-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovzxbw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] -; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] -; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovzxbw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovzxbw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] -; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovzxbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] -; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovzxbw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovzxbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] -; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovzxbw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] -; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovzxbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovzxbw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] -; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovzxbw: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; SKX-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovzxbw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [2:0.50] -; BDVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovzxbw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] -; BDVER2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [2:0.50] -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovzxbw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] -; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] -; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovzxbw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] -; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] -; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovzxbw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovzxbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25] -; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> - %2 = zext <8 x i8> %1 to <8 x i16> - %3 = load <8 x i8>, <8 x i8>* %a1, align 1 - %4 = zext <8 x i8> %3 to <8 x i16> - %5 = add <8 x i16> %2, %4 - ret <8 x i16> %5 -} - -define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { -; GENERIC-LABEL: test_pmovzxbd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] -; GENERIC-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmovzxbd: -; SLM: # %bb.0: -; SLM-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [4:1.00] -; SLM-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovzxbd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] -; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovzxbd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovzxbd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovzxbd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovzxbd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovzxbd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovzxbd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovzxbd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovzxbd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovzxbd: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; SKX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovzxbd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [2:0.50] -; BDVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovzxbd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] -; BDVER2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [2:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovzxbd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] -; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovzxbd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] -; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovzxbd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovzxbd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> - %2 = zext <4 x i8> %1 to <4 x i32> - %3 = load <4 x i8>, <4 x i8>* %a1, align 1 - %4 = zext <4 x i8> %3 to <4 x i32> - %5 = add <4 x i32> %2, %4 - ret <4 x i32> %5 -} - -define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { -; GENERIC-LABEL: test_pmovzxbq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] -; GENERIC-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmovzxbq: -; SLM: # %bb.0: -; SLM-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [4:1.00] -; SLM-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovzxbq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] -; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovzxbq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovzxbq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovzxbq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovzxbq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovzxbq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovzxbq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovzxbq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovzxbq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovzxbq: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; SKX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovzxbq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [2:0.50] -; BDVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovzxbq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] -; BDVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [2:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovzxbq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] -; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovzxbq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] -; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovzxbq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovzxbq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> - %2 = zext <2 x i8> %1 to <2 x i64> - %3 = load <2 x i8>, <2 x i8>* %a1, align 1 - %4 = zext <2 x i8> %3 to <2 x i64> - %5 = add <2 x i64> %2, %4 - ret <2 x i64> %5 -} - -define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { -; GENERIC-LABEL: test_pmovzxdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] -; GENERIC-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmovzxdq: -; SLM: # %bb.0: -; SLM-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [4:1.00] -; SLM-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovzxdq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] -; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovzxdq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovzxdq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovzxdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovzxdq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovzxdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovzxdq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovzxdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovzxdq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovzxdq: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; SKX-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovzxdq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [2:0.50] -; BDVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovzxdq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50] -; BDVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [2:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovzxdq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] -; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovzxdq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] -; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovzxdq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovzxdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> - %2 = zext <2 x i32> %1 to <2 x i64> - %3 = load <2 x i32>, <2 x i32>* %a1, align 1 - %4 = zext <2 x i32> %3 to <2 x i64> - %5 = add <2 x i64> %2, %4 - ret <2 x i64> %5 -} - -define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { -; GENERIC-LABEL: test_pmovzxwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] -; GENERIC-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmovzxwd: -; SLM: # %bb.0: -; SLM-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [4:1.00] -; SLM-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovzxwd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] -; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovzxwd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovzxwd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovzxwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovzxwd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovzxwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovzxwd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovzxwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovzxwd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovzxwd: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; SKX-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovzxwd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [2:0.50] -; BDVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovzxwd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] -; BDVER2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [2:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovzxwd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] -; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovzxwd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] -; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovzxwd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovzxwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> - %2 = zext <4 x i16> %1 to <4 x i32> - %3 = load <4 x i16>, <4 x i16>* %a1, align 1 - %4 = zext <4 x i16> %3 to <4 x i32> - %5 = add <4 x i32> %2, %4 - ret <4 x i32> %5 -} - -define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { -; GENERIC-LABEL: test_pmovzxwq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] -; GENERIC-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmovzxwq: -; SLM: # %bb.0: -; SLM-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [4:1.00] -; SLM-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovzxwq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] -; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovzxwq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovzxwq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovzxwq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovzxwq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovzxwq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovzxwq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovzxwq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovzxwq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovzxwq: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; SKX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovzxwq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [2:0.50] -; BDVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovzxwq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] -; BDVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [2:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovzxwq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] -; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovzxwq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] -; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovzxwq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovzxwq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> - %2 = zext <2 x i16> %1 to <2 x i64> - %3 = load <2 x i16>, <2 x i16>* %a1, align 1 - %4 = zext <2 x i16> %3 to <2 x i64> - %5 = add <2 x i64> %2, %4 - ret <2 x i64> %5 -} - -define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> *%a3) { -; GENERIC-LABEL: test_pmuldq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00] -; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmuldq: -; SLM: # %bb.0: -; SLM-NEXT: pmuldq (%rdi), %xmm2 # sched: [7:1.00] -; SLM-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmuldq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00] -; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmuldq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [11:1.00] -; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmuldq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00] -; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmuldq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [11:1.00] -; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmuldq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmuldq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [10:1.00] -; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmuldq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmuldq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [10:0.50] -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmuldq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [10:0.50] -; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmuldq: -; SKX: # %bb.0: -; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [10:0.50] -; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmuldq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [9:1.00] -; BDVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmuldq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmuldq (%rdi), %xmm2, %xmm2 # sched: [9:1.00] -; BDVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpor %xmm2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmuldq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [7:1.00] -; BTVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmuldq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmuldq (%rdi), %xmm2, %xmm2 # sched: [7:1.00] -; BTVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpor %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmuldq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmuldq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmuldq (%rdi), %xmm2, %xmm2 # sched: [11:1.00] -; ZNVER1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpor %xmm2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a3, align 16 - %3 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a2, <4 x i32> %2) - %4 = or <2 x i64> %1, %3 - ret <2 x i64> %4 -} -declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone - -define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pmulld: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pmulld: -; SLM: # %bb.0: -; SLM-NEXT: pmulld %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: pmulld (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmulld: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmulld: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmulld: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00] -; HASWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmulld: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] -; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmulld: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00] -; BROADWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [15:2.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmulld: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] -; BROADWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [15:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmulld: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00] -; SKYLAKE-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmulld: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00] -; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmulld: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00] -; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmulld: -; SKX: # %bb.0: -; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00] -; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmulld: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:2.00] -; BDVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [10:2.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmulld: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BDVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmulld: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [4:2.00] -; BTVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [9:2.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmulld: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:2.00] -; BTVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmulld: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmulld: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = mul <4 x i32> %a0, %a1 - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = mul <4 x i32> %1, %2 - ret <4 x i32> %3 -} - -define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_ptest: -; GENERIC: # %bb.0: -; GENERIC-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: setb %al # sched: [1:0.50] -; GENERIC-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: setb %cl # sched: [1:0.50] -; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] -; GENERIC-NEXT: movzbl %cl, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_ptest: -; SLM: # %bb.0: -; SLM-NEXT: ptest %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: setb %al # sched: [1:0.50] -; SLM-NEXT: ptest (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: setb %cl # sched: [1:0.50] -; SLM-NEXT: andb %al, %cl # sched: [1:0.50] -; SLM-NEXT: movzbl %cl, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_ptest: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: setb %al # sched: [1:0.50] -; SANDY-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: setb %cl # sched: [1:0.50] -; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_ptest: -; SANDY: # %bb.0: -; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: setb %al # sched: [1:0.50] -; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-NEXT: setb %cl # sched: [1:0.50] -; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_ptest: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: setb %al # sched: [1:0.50] -; HASWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: setb %cl # sched: [1:0.50] -; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_ptest: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: setb %al # sched: [1:0.50] -; HASWELL-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: setb %cl # sched: [1:0.50] -; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_ptest: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: setb %al # sched: [1:0.50] -; BROADWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: setb %cl # sched: [1:0.50] -; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ptest: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: setb %al # sched: [1:0.50] -; BROADWELL-NEXT: vptest (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: setb %cl # sched: [1:0.50] -; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_ptest: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: setb %al # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00] -; SKYLAKE-SSE-NEXT: setb %cl # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ptest: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: setb %al # sched: [1:0.50] -; SKYLAKE-NEXT: vptest (%rdi), %xmm0 # sched: [9:1.00] -; SKYLAKE-NEXT: setb %cl # sched: [1:0.50] -; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_ptest: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00] -; SKX-SSE-NEXT: setb %al # sched: [1:0.50] -; SKX-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: setb %cl # sched: [1:0.50] -; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_ptest: -; SKX: # %bb.0: -; SKX-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] -; SKX-NEXT: setb %al # sched: [1:0.50] -; SKX-NEXT: vptest (%rdi), %xmm0 # sched: [9:1.00] -; SKX-NEXT: setb %cl # sched: [1:0.50] -; SKX-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_ptest: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: setb %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-SSE-NEXT: setb %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_ptest: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vptest %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: setb %al # sched: [1:0.50] -; BDVER2-NEXT: vptest (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-NEXT: setb %cl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-NEXT: movzbl %cl, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_ptest: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: setb %al # sched: [1:0.50] -; BTVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: setb %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_ptest: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: setb %al # sched: [1:0.50] -; BTVER2-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: setb %cl # sched: [1:0.50] -; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-NEXT: movzbl %cl, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_ptest: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: setb %al # sched: [1:0.25] -; ZNVER1-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: setb %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_ptest: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vptest %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: setb %al # sched: [1:0.25] -; ZNVER1-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: setb %cl # sched: [1:0.25] -; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %2) - %4 = and i32 %1, %3 - ret i32 %4 -} -declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone - -define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_roundpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_roundpd: -; SLM: # %bb.0: -; SLM-NEXT: roundpd $7, (%rdi), %xmm1 # sched: [6:1.00] -; SLM-NEXT: roundpd $7, %xmm0, %xmm0 # sched: [3:1.00] -; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_roundpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_roundpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_roundpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [6:0.50] -; HASWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [12:2.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_roundpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [12:2.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_roundpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm1 # sched: [11:2.00] -; BROADWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_roundpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:2.00] -; BROADWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_roundpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00] -; SKYLAKE-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_roundpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00] -; SKYLAKE-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_roundpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00] -; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_roundpd: -; SKX: # %bb.0: -; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00] -; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_roundpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_roundpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_roundpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_roundpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_roundpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_roundpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:1.00] -; ZNVER1-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) - %2 = load <2 x double>, <2 x double> *%a1, align 16 - %3 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %2, i32 7) - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} -declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone - -define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_roundps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_roundps: -; SLM: # %bb.0: -; SLM-NEXT: roundps $7, (%rdi), %xmm1 # sched: [6:1.00] -; SLM-NEXT: roundps $7, %xmm0, %xmm0 # sched: [3:1.00] -; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_roundps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_roundps: -; SANDY: # %bb.0: -; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_roundps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [6:0.50] -; HASWELL-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [12:2.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_roundps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [12:2.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_roundps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: roundps $7, (%rdi), %xmm1 # sched: [11:2.00] -; BROADWELL-SSE-NEXT: roundps $7, %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_roundps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:2.00] -; BROADWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_roundps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00] -; SKYLAKE-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_roundps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00] -; SKYLAKE-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_roundps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00] -; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_roundps: -; SKX: # %bb.0: -; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00] -; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_roundps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_roundps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_roundps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_roundps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_roundps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_roundps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:1.00] -; ZNVER1-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %2, i32 7) - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} -declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone - -define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_roundsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] -; GENERIC-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_roundsd: -; SLM: # %bb.0: -; SLM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] -; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_roundsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; SANDY-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] -; SANDY-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_roundsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_roundsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; HASWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50] -; HASWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [12:2.00] -; HASWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_roundsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50] -; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00] -; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_roundsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:2.00] -; BROADWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_roundsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00] -; BROADWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_roundsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00] -; SKYLAKE-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_roundsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_roundsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00] -; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00] -; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_roundsd: -; SKX: # %bb.0: -; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] -; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_roundsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; BDVER2-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [4:1.00] -; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_roundsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_roundsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] -; BTVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_roundsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_roundsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_roundsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00] -; ZNVER1-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) - %2 = load <2 x double>, <2 x double>* %a2, align 16 - %3 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %2, i32 7) - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} -declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone - -define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { -; GENERIC-LABEL: test_roundss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] -; GENERIC-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_roundss: -; SLM: # %bb.0: -; SLM-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: roundss $7, (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] -; SLM-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_roundss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] -; SANDY-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] -; SANDY-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_roundss: -; SANDY: # %bb.0: -; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_roundss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] -; HASWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50] -; HASWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [12:2.00] -; HASWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_roundss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50] -; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00] -; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_roundss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:2.00] -; BROADWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_roundss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00] -; BROADWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vaddps %xmm2, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_roundss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00] -; SKYLAKE-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_roundss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] -; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_roundss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00] -; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00] -; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_roundss: -; SKX: # %bb.0: -; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] -; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] -; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_roundss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50] -; BDVER2-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [4:1.00] -; BDVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_roundss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_roundss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] -; BTVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_roundss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_roundss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_roundss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00] -; ZNVER1-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) - %2 = load <4 x float>, <4 x float> *%a2, align 16 - %3 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %2, i32 7) - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} -declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone Index: test/CodeGen/X86/sse42-schedule.ll =================================================================== --- test/CodeGen/X86/sse42-schedule.ll +++ test/CodeGen/X86/sse42-schedule.ll @@ -1,1631 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2,+pclmul | FileCheck %s --check-prefixes=CHECK,GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2,-xop | FileCheck %s --check-prefixes=CHECK,BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 - -define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { -; GENERIC-LABEL: crc32_32_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; GENERIC-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: crc32_32_8: -; SLM: # %bb.0: -; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] -; SLM-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SLM-NEXT: crc32b (%rdx), %eax # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: crc32_32_8: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SANDY-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: crc32_32_8: -; SANDY: # %bb.0: -; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: crc32_32_8: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; HASWELL-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: crc32_32_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; HASWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; HASWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: crc32_32_8: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; BROADWELL-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: crc32_32_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; BROADWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; BROADWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: crc32_32_8: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: crc32_32_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: crc32_32_8: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SKX-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: crc32_32_8: -; SKX: # %bb.0: -; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: crc32_32_8: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:2.00] -; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [7:2.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: crc32_32_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.50] -; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:2.00] -; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: crc32_32_8: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:2.00] -; BTVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [6:2.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: crc32_32_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] -; BTVER2-NEXT: crc32b %sil, %eax # sched: [3:2.00] -; BTVER2-NEXT: crc32b (%rdx), %eax # sched: [6:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: crc32_32_8: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; ZNVER1-SSE-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: crc32_32_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] -; ZNVER1-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; ZNVER1-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1) - %2 = load i8, i8 *%a2 - %3 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %1, i8 %2) - ret i32 %3 -} -declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind - -define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { -; GENERIC-LABEL: crc32_32_16: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: crc32w %si, %eax # sched: [3:1.00] -; GENERIC-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: crc32_32_16: -; SLM: # %bb.0: -; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] -; SLM-NEXT: crc32w %si, %eax # sched: [3:1.00] -; SLM-NEXT: crc32w (%rdx), %eax # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: crc32_32_16: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00] -; SANDY-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: crc32_32_16: -; SANDY: # %bb.0: -; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: crc32w %si, %eax # sched: [3:1.00] -; SANDY-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: crc32_32_16: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00] -; HASWELL-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: crc32_32_16: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; HASWELL-NEXT: crc32w %si, %eax # sched: [3:1.00] -; HASWELL-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: crc32_32_16: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00] -; BROADWELL-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: crc32_32_16: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; BROADWELL-NEXT: crc32w %si, %eax # sched: [3:1.00] -; BROADWELL-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: crc32_32_16: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: crc32_32_16: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: crc32w %si, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: crc32_32_16: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00] -; SKX-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: crc32_32_16: -; SKX: # %bb.0: -; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: crc32w %si, %eax # sched: [3:1.00] -; SKX-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: crc32_32_16: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: crc32w %si, %eax # sched: [5:2.00] -; BDVER2-SSE-NEXT: crc32w (%rdx), %eax # sched: [7:2.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: crc32_32_16: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.50] -; BDVER2-NEXT: crc32w %si, %eax # sched: [5:2.00] -; BDVER2-NEXT: crc32w (%rdx), %eax # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: crc32_32_16: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: crc32w %si, %eax # sched: [3:2.00] -; BTVER2-SSE-NEXT: crc32w (%rdx), %eax # sched: [6:2.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: crc32_32_16: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] -; BTVER2-NEXT: crc32w %si, %eax # sched: [3:2.00] -; BTVER2-NEXT: crc32w (%rdx), %eax # sched: [6:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: crc32_32_16: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00] -; ZNVER1-SSE-NEXT: crc32w (%rdx), %eax # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: crc32_32_16: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] -; ZNVER1-NEXT: crc32w %si, %eax # sched: [3:1.00] -; ZNVER1-NEXT: crc32w (%rdx), %eax # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1) - %2 = load i16, i16 *%a2 - %3 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %1, i16 %2) - ret i32 %3 -} -declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind - -define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: crc32_32_32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] -; GENERIC-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; GENERIC-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: crc32_32_32: -; SLM: # %bb.0: -; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] -; SLM-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; SLM-NEXT: crc32l (%rdx), %eax # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: crc32_32_32: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; SANDY-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: crc32_32_32: -; SANDY: # %bb.0: -; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; SANDY-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: crc32_32_32: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; HASWELL-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: crc32_32_32: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; HASWELL-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; HASWELL-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: crc32_32_32: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; BROADWELL-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: crc32_32_32: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; BROADWELL-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; BROADWELL-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: crc32_32_32: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: crc32_32_32: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: crc32_32_32: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; SKX-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: crc32_32_32: -; SKX: # %bb.0: -; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; SKX-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: crc32_32_32: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: crc32l %esi, %eax # sched: [6:2.00] -; BDVER2-SSE-NEXT: crc32l (%rdx), %eax # sched: [7:2.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: crc32_32_32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.50] -; BDVER2-NEXT: crc32l %esi, %eax # sched: [6:2.00] -; BDVER2-NEXT: crc32l (%rdx), %eax # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: crc32_32_32: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: crc32l %esi, %eax # sched: [3:2.00] -; BTVER2-SSE-NEXT: crc32l (%rdx), %eax # sched: [6:2.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: crc32_32_32: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] -; BTVER2-NEXT: crc32l %esi, %eax # sched: [3:2.00] -; BTVER2-NEXT: crc32l (%rdx), %eax # sched: [6:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: crc32_32_32: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; ZNVER1-SSE-NEXT: crc32l (%rdx), %eax # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: crc32_32_32: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25] -; ZNVER1-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; ZNVER1-NEXT: crc32l (%rdx), %eax # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) - %2 = load i32, i32 *%a2 - %3 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %1, i32 %2) - ret i32 %3 -} -declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind - -define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { -; GENERIC-LABEL: crc32_64_8: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; GENERIC-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: crc32_64_8: -; SLM: # %bb.0: -; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] -; SLM-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SLM-NEXT: crc32b (%rdx), %eax # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: crc32_64_8: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33] -; SANDY-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SANDY-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: crc32_64_8: -; SANDY: # %bb.0: -; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] -; SANDY-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: crc32_64_8: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HASWELL-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; HASWELL-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: crc32_64_8: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HASWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; HASWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: crc32_64_8: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; BROADWELL-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: crc32_64_8: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] -; BROADWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; BROADWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: crc32_64_8: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: crc32_64_8: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: crc32_64_8: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKX-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SKX-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: crc32_64_8: -; SKX: # %bb.0: -; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKX-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: crc32_64_8: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:2.00] -; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [7:2.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: crc32_64_8: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:2.00] -; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: crc32_64_8: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BTVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:2.00] -; BTVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [6:2.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: crc32_64_8: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: crc32b %sil, %eax # sched: [3:2.00] -; BTVER2-NEXT: crc32b (%rdx), %eax # sched: [6:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: crc32_64_8: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; ZNVER1-SSE-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: crc32_64_8: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] -; ZNVER1-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; ZNVER1-NEXT: crc32b (%rdx), %eax # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1) - %2 = load i8, i8 *%a2 - %3 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %1, i8 %2) - ret i64 %3 -} -declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind - -define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: crc32_64_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] -; GENERIC-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; GENERIC-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: crc32_64_64: -; SLM: # %bb.0: -; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] -; SLM-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; SLM-NEXT: crc32q (%rdx), %rax # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: crc32_64_64: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33] -; SANDY-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; SANDY-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: crc32_64_64: -; SANDY: # %bb.0: -; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] -; SANDY-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; SANDY-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: crc32_64_64: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HASWELL-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; HASWELL-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: crc32_64_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HASWELL-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; HASWELL-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: crc32_64_64: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; BROADWELL-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: crc32_64_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] -; BROADWELL-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; BROADWELL-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: crc32_64_64: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: crc32_64_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; SKYLAKE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: crc32_64_64: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKX-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; SKX-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: crc32_64_64: -; SKX: # %bb.0: -; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKX-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; SKX-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: crc32_64_64: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BDVER2-SSE-NEXT: crc32q %rsi, %rax # sched: [10:2.00] -; BDVER2-SSE-NEXT: crc32q (%rdx), %rax # sched: [7:2.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: crc32_64_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BDVER2-NEXT: crc32q %rsi, %rax # sched: [10:2.00] -; BDVER2-NEXT: crc32q (%rdx), %rax # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: crc32_64_64: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BTVER2-SSE-NEXT: crc32q %rsi, %rax # sched: [3:2.00] -; BTVER2-SSE-NEXT: crc32q (%rdx), %rax # sched: [6:2.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: crc32_64_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: crc32q %rsi, %rax # sched: [3:2.00] -; BTVER2-NEXT: crc32q (%rdx), %rax # sched: [6:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: crc32_64_64: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; ZNVER1-SSE-NEXT: crc32q (%rdx), %rax # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: crc32_64_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25] -; ZNVER1-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; ZNVER1-NEXT: crc32q (%rdx), %rax # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) - %2 = load i64, i64 *%a2 - %3 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %1, i64 %2) - ret i64 %3 -} -declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind - -define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pcmpestri: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33] -; GENERIC-NEXT: movl $7, %edx # sched: [1:0.33] -; GENERIC-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67] -; GENERIC-NEXT: movl %ecx, %esi # sched: [1:0.33] -; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33] -; GENERIC-NEXT: movl $7, %edx # sched: [1:0.33] -; GENERIC-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33] -; GENERIC-NEXT: # kill: def $ecx killed $ecx def $rcx -; GENERIC-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pcmpestri: -; SLM: # %bb.0: -; SLM-NEXT: movl $7, %eax # sched: [1:0.50] -; SLM-NEXT: movl $7, %edx # sched: [1:0.50] -; SLM-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [21:21.00] -; SLM-NEXT: movl $7, %eax # sched: [1:0.50] -; SLM-NEXT: movl $7, %edx # sched: [1:0.50] -; SLM-NEXT: movl %ecx, %esi # sched: [1:0.50] -; SLM-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [21:21.00] -; SLM-NEXT: # kill: def $ecx killed $ecx def $rcx -; SLM-NEXT: leal (%rcx,%rsi), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpestri: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33] -; SANDY-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67] -; SANDY-SSE-NEXT: movl %ecx, %esi # sched: [1:0.33] -; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33] -; SANDY-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33] -; SANDY-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; SANDY-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpestri: -; SANDY: # %bb.0: -; SANDY-NEXT: movl $7, %eax # sched: [1:0.33] -; SANDY-NEXT: movl $7, %edx # sched: [1:0.33] -; SANDY-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67] -; SANDY-NEXT: movl %ecx, %esi # sched: [1:0.33] -; SANDY-NEXT: movl $7, %eax # sched: [1:0.33] -; SANDY-NEXT: movl $7, %edx # sched: [1:0.33] -; SANDY-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33] -; SANDY-NEXT: # kill: def $ecx killed $ecx def $rcx -; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpestri: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] -; HASWELL-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25] -; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00] -; HASWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; HASWELL-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpestri: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] -; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] -; HASWELL-NEXT: movl %ecx, %esi # sched: [1:0.25] -; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] -; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00] -; HASWELL-NEXT: # kill: def $ecx killed $ecx def $rcx -; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpestri: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; BROADWELL-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] -; BROADWELL-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25] -; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; BROADWELL-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [23:4.00] -; BROADWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; BROADWELL-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpestri: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] -; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; BROADWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] -; BROADWELL-NEXT: movl %ecx, %esi # sched: [1:0.25] -; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] -; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; BROADWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [23:4.00] -; BROADWELL-NEXT: # kill: def $ecx killed $ecx def $rcx -; BROADWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpestri: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] -; SKYLAKE-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00] -; SKYLAKE-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; SKYLAKE-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpestri: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKYLAKE-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] -; SKYLAKE-NEXT: movl %ecx, %esi # sched: [1:0.25] -; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKYLAKE-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00] -; SKYLAKE-NEXT: # kill: def $ecx killed $ecx def $rcx -; SKYLAKE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpestri: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKX-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] -; SKX-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25] -; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKX-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00] -; SKX-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; SKX-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpestri: -; SKX: # %bb.0: -; SKX-NEXT: movl $7, %eax # sched: [1:0.25] -; SKX-NEXT: movl $7, %edx # sched: [1:0.25] -; SKX-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] -; SKX-NEXT: movl %ecx, %esi # sched: [1:0.25] -; SKX-NEXT: movl $7, %eax # sched: [1:0.25] -; SKX-NEXT: movl $7, %edx # sched: [1:0.25] -; SKX-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00] -; SKX-NEXT: # kill: def $ecx killed $ecx def $rcx -; SKX-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpestri: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] -; BDVER2-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [15:4.00] -; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] -; BDVER2-SSE-NEXT: movl %ecx, %esi # sched: [1:0.50] -; BDVER2-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [20:4.50] -; BDVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; BDVER2-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpestri: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl $7, %eax # sched: [1:0.50] -; BDVER2-NEXT: movl $7, %edx # sched: [1:0.50] -; BDVER2-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [15:4.00] -; BDVER2-NEXT: movl $7, %eax # sched: [1:0.50] -; BDVER2-NEXT: movl $7, %edx # sched: [1:0.50] -; BDVER2-NEXT: movl %ecx, %esi # sched: [1:0.50] -; BDVER2-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [20:4.50] -; BDVER2-NEXT: # kill: def $ecx killed $ecx def $rcx -; BDVER2-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpestri: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [14:5.00] -; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] -; BTVER2-SSE-NEXT: movl %ecx, %esi # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [19:5.00] -; BTVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; BTVER2-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpestri: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl $7, %eax # sched: [1:0.50] -; BTVER2-NEXT: movl $7, %edx # sched: [1:0.50] -; BTVER2-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [14:5.00] -; BTVER2-NEXT: movl $7, %eax # sched: [1:0.50] -; BTVER2-NEXT: movl $7, %edx # sched: [1:0.50] -; BTVER2-NEXT: movl %ecx, %esi # sched: [1:0.50] -; BTVER2-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [19:5.00] -; BTVER2-NEXT: # kill: def $ecx killed $ecx def $rcx -; BTVER2-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpestri: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; ZNVER1-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpestri: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25] -; ZNVER1-NEXT: movl $7, %edx # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25] -; ZNVER1-NEXT: movl $7, %edx # sched: [1:0.25] -; ZNVER1-NEXT: movl %ecx, %esi # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: # kill: def $ecx killed $ecx def $rcx -; ZNVER1-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %2, i32 7, i8 7) - %4 = add i32 %1, %3 - ret i32 %4 -} -declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone - -define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pcmpestrm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33] -; GENERIC-NEXT: movl $7, %edx # sched: [1:0.33] -; GENERIC-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67] -; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33] -; GENERIC-NEXT: movl $7, %edx # sched: [1:0.33] -; GENERIC-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pcmpestrm: -; SLM: # %bb.0: -; SLM-NEXT: movl $7, %eax # sched: [1:0.50] -; SLM-NEXT: movl $7, %edx # sched: [1:0.50] -; SLM-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [17:17.00] -; SLM-NEXT: movl $7, %eax # sched: [1:0.50] -; SLM-NEXT: movl $7, %edx # sched: [1:0.50] -; SLM-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [17:17.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpestrm: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33] -; SANDY-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67] -; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33] -; SANDY-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpestrm: -; SANDY: # %bb.0: -; SANDY-NEXT: movl $7, %eax # sched: [1:0.33] -; SANDY-NEXT: movl $7, %edx # sched: [1:0.33] -; SANDY-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67] -; SANDY-NEXT: movl $7, %eax # sched: [1:0.33] -; SANDY-NEXT: movl $7, %edx # sched: [1:0.33] -; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpestrm: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] -; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpestrm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] -; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] -; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] -; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpestrm: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; BROADWELL-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] -; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; BROADWELL-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [24:4.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpestrm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] -; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; BROADWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] -; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] -; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; BROADWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [24:4.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpestrm: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] -; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpestrm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKYLAKE-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] -; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKYLAKE-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpestrm: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKX-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] -; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; SKX-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpestrm: -; SKX: # %bb.0: -; SKX-NEXT: movl $7, %eax # sched: [1:0.25] -; SKX-NEXT: movl $7, %edx # sched: [1:0.25] -; SKX-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] -; SKX-NEXT: movl $7, %eax # sched: [1:0.25] -; SKX-NEXT: movl $7, %edx # sched: [1:0.25] -; SKX-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpestrm: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] -; BDVER2-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00] -; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] -; BDVER2-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [15:4.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpestrm: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl $7, %eax # sched: [1:0.50] -; BDVER2-NEXT: movl $7, %edx # sched: [1:0.50] -; BDVER2-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00] -; BDVER2-NEXT: movl $7, %eax # sched: [1:0.50] -; BDVER2-NEXT: movl $7, %edx # sched: [1:0.50] -; BDVER2-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [15:4.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpestrm: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [14:5.00] -; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [19:5.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpestrm: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl $7, %eax # sched: [1:0.50] -; BTVER2-NEXT: movl $7, %edx # sched: [1:0.50] -; BTVER2-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [14:5.00] -; BTVER2-NEXT: movl $7, %eax # sched: [1:0.50] -; BTVER2-NEXT: movl $7, %edx # sched: [1:0.50] -; BTVER2-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:5.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpestrm: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpestrm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25] -; ZNVER1-NEXT: movl $7, %edx # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25] -; ZNVER1-NEXT: movl $7, %edx # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone - -define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pcmpistri: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] -; GENERIC-NEXT: movl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] -; GENERIC-NEXT: # kill: def $ecx killed $ecx def $rcx -; GENERIC-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pcmpistri: -; SLM: # %bb.0: -; SLM-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [17:17.00] -; SLM-NEXT: movl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:17.00] -; SLM-NEXT: # kill: def $ecx killed $ecx def $rcx -; SLM-NEXT: leal (%rcx,%rax), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpistri: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] -; SANDY-SSE-NEXT: movl %ecx, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] -; SANDY-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; SANDY-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpistri: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] -; SANDY-NEXT: movl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] -; SANDY-NEXT: # kill: def $ecx killed $ecx def $rcx -; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpistri: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] -; HASWELL-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] -; HASWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; HASWELL-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpistri: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] -; HASWELL-NEXT: movl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] -; HASWELL-NEXT: # kill: def $ecx killed $ecx def $rcx -; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpistri: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] -; BROADWELL-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00] -; BROADWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; BROADWELL-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpistri: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] -; BROADWELL-NEXT: movl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00] -; BROADWELL-NEXT: # kill: def $ecx killed $ecx def $rcx -; BROADWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpistri: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00] -; SKYLAKE-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00] -; SKYLAKE-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; SKYLAKE-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpistri: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00] -; SKYLAKE-NEXT: movl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00] -; SKYLAKE-NEXT: # kill: def $ecx killed $ecx def $rcx -; SKYLAKE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpistri: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00] -; SKX-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00] -; SKX-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; SKX-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpistri: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00] -; SKX-NEXT: movl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00] -; SKX-NEXT: # kill: def $ecx killed $ecx def $rcx -; SKX-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpistri: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [14:1.00] -; BDVER2-SSE-NEXT: movl %ecx, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [19:1.00] -; BDVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; BDVER2-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpistri: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [14:1.00] -; BDVER2-NEXT: movl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [19:1.00] -; BDVER2-NEXT: # kill: def $ecx killed $ecx def $rcx -; BDVER2-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpistri: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [7:2.00] -; BTVER2-SSE-NEXT: movl %ecx, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [12:2.00] -; BTVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; BTVER2-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpistri: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [7:2.00] -; BTVER2-NEXT: movl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [12:2.00] -; BTVER2-NEXT: # kill: def $ecx killed $ecx def $rcx -; BTVER2-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpistri: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx -; ZNVER1-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpistri: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: movl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: # kill: def $ecx killed $ecx def $rcx -; ZNVER1-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %2, i8 7) - %4 = add i32 %1, %3 - ret i32 %4 -} -declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone - -define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pcmpistrm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] -; GENERIC-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pcmpistrm: -; SLM: # %bb.0: -; SLM-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [13:13.00] -; SLM-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [13:13.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpistrm: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] -; SANDY-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpistrm: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] -; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpistrm: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] -; HASWELL-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpistrm: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] -; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpistrm: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] -; BROADWELL-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpistrm: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] -; BROADWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpistrm: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] -; SKYLAKE-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpistrm: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] -; SKYLAKE-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpistrm: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] -; SKX-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpistrm: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] -; SKX-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpistrm: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [6:1.00] -; BDVER2-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpistrm: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [6:1.00] -; BDVER2-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpistrm: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [8:2.00] -; BTVER2-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [13:2.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpistrm: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [8:2.00] -; BTVER2-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [13:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpistrm: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpistrm: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %1, <16 x i8> %2, i8 7) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone - -define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_pcmpgtq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pcmpgtq: -; SLM: # %bb.0: -; SLM-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpgtq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpgtq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpgtq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpgtq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpgtq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpgtq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [9:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpgtq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [3:1.00] -; SKX-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtq: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpgtq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpgtq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpgtq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpgtq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpgtq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50] -; ZNVER1-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpgtq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; ZNVER1-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp sgt <2 x i64> %a0, %a1 - %2 = sext <2 x i1> %1 to <2 x i64> - %3 = load <2 x i64>, <2 x i64>*%a2, align 16 - %4 = icmp sgt <2 x i64> %2, %3 - %5 = sext <2 x i1> %4 to <2 x i64> - ret <2 x i64> %5 -} - -define <2 x i64> @test_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_pclmulqdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [14:6.00] -; GENERIC-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [14:5.67] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; SLM-LABEL: test_pclmulqdq: -; SLM: # %bb.0: -; SLM-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [10:10.00] -; SLM-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [10:10.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pclmulqdq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [14:6.00] -; SANDY-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [14:5.67] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pclmulqdq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [14:6.00] -; SANDY-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [14:5.67] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pclmulqdq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [11:2.00] -; HASWELL-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [17:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pclmulqdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [11:2.00] -; HASWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [17:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pclmulqdq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pclmulqdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pclmulqdq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [12:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pclmulqdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00] -; SKYLAKE-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pclmulqdq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [6:1.00] -; SKX-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [12:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pclmulqdq: -; SKX: # %bb.0: -; SKX-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00] -; SKX-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pclmulqdq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [12:1.00] -; BDVER2-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [17:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pclmulqdq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [13:1.00] -; BDVER2-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [17:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pclmulqdq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pclmulqdq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pclmulqdq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pclmulqdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x i64>, <2 x i64> *%a2, align 16 - %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) - %3 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %2, i8 0) - ret <2 x i64> %3 -} -declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) Index: test/CodeGen/X86/sse4a-schedule.ll =================================================================== --- test/CodeGen/X86/sse4a-schedule.ll +++ test/CodeGen/X86/sse4a-schedule.ll @@ -1,156 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4a | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1 - -define <2 x i64> @test_extrq(<2 x i64> %a0, <16 x i8> %a1) { -; GENERIC-LABEL: test_extrq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_extrq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: extrq %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_extrq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_extrq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: extrq %xmm1, %xmm0 # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %a0, <16 x i8> %a1) - ret <2 x i64> %1 -} -declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) - -define <2 x i64> @test_extrqi(<2 x i64> %a0) { -; GENERIC-LABEL: test_extrqi: -; GENERIC: # %bb.0: -; GENERIC-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_extrqi: -; BDVER2: # %bb.0: -; BDVER2-NEXT: extrq $2, $3, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_extrqi: -; BTVER2: # %bb.0: -; BTVER2-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_extrqi: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: extrq $2, $3, %xmm0 # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a0, i8 3, i8 2) - ret <2 x i64> %1 -} -declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) - -define <2 x i64> @test_insertq(<2 x i64> %a0, <2 x i64> %a1) { -; GENERIC-LABEL: test_insertq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: insertq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_insertq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: insertq %xmm1, %xmm0 # sched: [3:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_insertq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: insertq %xmm1, %xmm0 # sched: [2:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_insertq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: insertq %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %a0, <2 x i64> %a1) - ret <2 x i64> %1 -} -declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_insertqi(<2 x i64> %a0, <2 x i64> %a1) { -; GENERIC-LABEL: test_insertqi: -; GENERIC: # %bb.0: -; GENERIC-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_insertqi: -; BDVER2: # %bb.0: -; BDVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [3:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_insertqi: -; BTVER2: # %bb.0: -; BTVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [2:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_insertqi: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a0, <2 x i64> %a1, i8 5, i8 6) - ret <2 x i64> %1 -} -declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) - -define void @test_movntsd(i8* %p, <2 x double> %a) { -; GENERIC-LABEL: test_movntsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movntsd %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_movntsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movntsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movntsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movntsd %xmm0, (%rdi) # sched: [8:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a) - ret void -} -declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>) - -define void @test_movntss(i8* %p, <4 x float> %a) { -; GENERIC-LABEL: test_movntss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movntss %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_movntss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movntss %xmm0, (%rdi) # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-LABEL: test_movntss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movntss %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_movntss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movntss %xmm0, (%rdi) # sched: [8:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a) - ret void -} -declare void @llvm.x86.sse4a.movnt.ss(i8*, <4 x float>) - Index: test/CodeGen/X86/ssse3-schedule.ll =================================================================== --- test/CodeGen/X86/ssse3-schedule.ll +++ test/CodeGen/X86/ssse3-schedule.ll @@ -1,2049 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefixes=CHECK,ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKX-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 - -define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { -; GENERIC-LABEL: test_pabsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pabsb: -; ATOM: # %bb.0: -; ATOM-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: pabsb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pabsb: -; SLM: # %bb.0: -; SLM-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: pabsb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pabsb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pabsb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pabsb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pabsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pabsb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pabsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pabsb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pabsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pabsb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] -; SKX-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pabsb: -; SKX: # %bb.0: -; SKX-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pabsb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pabsb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpabsb %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pabsb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pabsb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpabsb (%rdi), %xmm1 # sched: [6:1.00] -; BTVER2-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pabsb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pabsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpabsb (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) - %2 = load <16 x i8>, <16 x i8> *%a1, align 16 - %3 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %2) - %4 = or <16 x i8> %1, %3 - ret <16 x i8> %4 -} -declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone - -define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { -; GENERIC-LABEL: test_pabsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pabsd: -; ATOM: # %bb.0: -; ATOM-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: pabsd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pabsd: -; SLM: # %bb.0: -; SLM-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: pabsd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pabsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pabsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pabsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pabsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pabsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pabsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pabsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pabsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pabsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] -; SKX-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pabsd: -; SKX: # %bb.0: -; SKX-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pabsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pabsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpabsd %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pabsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pabsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpabsd (%rdi), %xmm1 # sched: [6:1.00] -; BTVER2-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pabsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pabsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpabsd (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) - %2 = load <4 x i32>, <4 x i32> *%a1, align 16 - %3 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %2) - %4 = or <4 x i32> %1, %3 - ret <4 x i32> %4 -} -declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone - -define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { -; GENERIC-LABEL: test_pabsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pabsw: -; ATOM: # %bb.0: -; ATOM-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: pabsw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pabsw: -; SLM: # %bb.0: -; SLM-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: pabsw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pabsw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pabsw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pabsw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pabsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pabsw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pabsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pabsw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pabsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pabsw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] -; SKX-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pabsw: -; SKX: # %bb.0: -; SKX-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pabsw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pabsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpabsw %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pabsw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pabsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpabsw (%rdi), %xmm1 # sched: [6:1.00] -; BTVER2-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pabsw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pabsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpabsw (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) - %2 = load <8 x i16>, <8 x i16> *%a1, align 16 - %3 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %2) - %4 = or <8 x i16> %1, %3 - ret <8 x i16> %4 -} -declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone - -define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_palignr: -; GENERIC: # %bb.0: -; GENERIC-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] -; GENERIC-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] -; GENERIC-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_palignr: -; ATOM: # %bb.0: -; ATOM-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; ATOM-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_palignr: -; SLM: # %bb.0: -; SLM-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; SLM-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [4:1.00] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_palignr: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] -; SANDY-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] -; SANDY-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_palignr: -; SANDY: # %bb.0: -; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] -; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_palignr: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; HASWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] -; HASWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_palignr: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_palignr: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; BROADWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] -; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_palignr: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_palignr: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_palignr: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_palignr: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; SKX-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] -; SKX-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_palignr: -; SKX: # %bb.0: -; SKX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; SKX-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_palignr: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [2:0.50] -; BDVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] -; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_palignr: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [2:0.50] -; BDVER2-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_palignr: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] -; BTVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] -; BTVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_palignr: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] -; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_palignr: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25] -; ZNVER1-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50] -; ZNVER1-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_palignr: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25] -; ZNVER1-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = shufflevector <8 x i16> %2, <8 x i16> %1, <8 x i32> - ret <8 x i16> %3 -} - -define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_phaddd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_phaddd: -; ATOM: # %bb.0: -; ATOM-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50] -; ATOM-NEXT: phaddd (%rdi), %xmm0 # sched: [4:2.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_phaddd: -; SLM: # %bb.0: -; SLM-NEXT: phaddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: phaddd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_phaddd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50] -; SANDY-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_phaddd: -; SANDY: # %bb.0: -; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_phaddd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] -; HASWELL-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_phaddd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_phaddd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] -; BROADWELL-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [8:2.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phaddd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_phaddd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] -; SKYLAKE-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phaddd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_phaddd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] -; SKX-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phaddd: -; SKX: # %bb.0: -; SKX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_phaddd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [10:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_phaddd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_phaddd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_phaddd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_phaddd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_phaddd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %1, <4 x i32> %2) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone - -define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_phaddsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_phaddsw: -; ATOM: # %bb.0: -; ATOM-NEXT: phaddsw %xmm1, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: phaddsw (%rdi), %xmm0 # sched: [8:4.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_phaddsw: -; SLM: # %bb.0: -; SLM-NEXT: phaddsw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: phaddsw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_phaddsw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50] -; SANDY-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_phaddsw: -; SANDY: # %bb.0: -; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_phaddsw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] -; HASWELL-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_phaddsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_phaddsw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] -; BROADWELL-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [8:2.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phaddsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_phaddsw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] -; SKYLAKE-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phaddsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_phaddsw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] -; SKX-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phaddsw: -; SKX: # %bb.0: -; SKX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_phaddsw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [10:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_phaddsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_phaddsw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_phaddsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_phaddsw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_phaddsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_phaddw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_phaddw: -; ATOM: # %bb.0: -; ATOM-NEXT: phaddw %xmm1, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: phaddw (%rdi), %xmm0 # sched: [8:4.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_phaddw: -; SLM: # %bb.0: -; SLM-NEXT: phaddw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: phaddw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_phaddw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50] -; SANDY-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_phaddw: -; SANDY: # %bb.0: -; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_phaddw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] -; HASWELL-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_phaddw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_phaddw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] -; BROADWELL-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [8:2.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phaddw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_phaddw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] -; SKYLAKE-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phaddw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_phaddw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] -; SKX-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phaddw: -; SKX: # %bb.0: -; SKX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_phaddw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [10:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_phaddw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_phaddw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_phaddw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_phaddw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_phaddw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone - -define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_phsubd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_phsubd: -; ATOM: # %bb.0: -; ATOM-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50] -; ATOM-NEXT: phsubd (%rdi), %xmm0 # sched: [4:2.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_phsubd: -; SLM: # %bb.0: -; SLM-NEXT: phsubd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: phsubd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_phsubd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50] -; SANDY-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_phsubd: -; SANDY: # %bb.0: -; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_phsubd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] -; HASWELL-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_phsubd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_phsubd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] -; BROADWELL-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [8:2.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phsubd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_phsubd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] -; SKYLAKE-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phsubd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_phsubd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] -; SKX-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phsubd: -; SKX: # %bb.0: -; SKX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_phsubd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [10:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_phsubd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_phsubd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_phsubd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_phsubd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_phsubd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %1, <4 x i32> %2) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone - -define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_phsubsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_phsubsw: -; ATOM: # %bb.0: -; ATOM-NEXT: phsubsw %xmm1, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: phsubsw (%rdi), %xmm0 # sched: [8:4.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_phsubsw: -; SLM: # %bb.0: -; SLM-NEXT: phsubsw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: phsubsw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_phsubsw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50] -; SANDY-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_phsubsw: -; SANDY: # %bb.0: -; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_phsubsw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] -; HASWELL-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_phsubsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_phsubsw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] -; BROADWELL-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [8:2.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phsubsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_phsubsw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] -; SKYLAKE-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phsubsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_phsubsw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] -; SKX-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phsubsw: -; SKX: # %bb.0: -; SKX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_phsubsw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [10:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_phsubsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_phsubsw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_phsubsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_phsubsw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_phsubsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_phsubw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_phsubw: -; ATOM: # %bb.0: -; ATOM-NEXT: phsubw %xmm1, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: phsubw (%rdi), %xmm0 # sched: [8:4.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_phsubw: -; SLM: # %bb.0: -; SLM-NEXT: phsubw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: phsubw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_phsubw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50] -; SANDY-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_phsubw: -; SANDY: # %bb.0: -; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_phsubw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] -; HASWELL-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_phsubw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_phsubw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] -; BROADWELL-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [8:2.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_phsubw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BROADWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_phsubw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] -; SKYLAKE-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_phsubw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKYLAKE-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_phsubw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] -; SKX-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_phsubw: -; SKX: # %bb.0: -; SKX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_phsubw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [10:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_phsubw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_phsubw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_phsubw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_phsubw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_phsubw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pmaddubsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmaddubsw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmaddubsw: -; SLM: # %bb.0: -; SLM-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmaddubsw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmaddubsw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmaddubsw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmaddubsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmaddubsw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaddubsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmaddubsw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaddubsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmaddubsw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaddubsw: -; SKX: # %bb.0: -; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmaddubsw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmaddubsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmaddubsw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmaddubsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmaddubsw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmaddubsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = bitcast <8 x i16> %1 to <16 x i8> - %4 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %3, <16 x i8> %2) - ret <8 x i16> %4 -} -declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone - -define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pmulhrsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmulhrsw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmulhrsw: -; SLM: # %bb.0: -; SLM-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmulhrsw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmulhrsw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmulhrsw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmulhrsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmulhrsw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmulhrsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmulhrsw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmulhrsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmulhrsw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmulhrsw: -; SKX: # %bb.0: -; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmulhrsw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmulhrsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmulhrsw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmulhrsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmulhrsw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmulhrsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pshufb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pshufb: -; ATOM: # %bb.0: -; ATOM-NEXT: pshufb %xmm1, %xmm0 # sched: [4:2.00] -; ATOM-NEXT: pshufb (%rdi), %xmm0 # sched: [5:2.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pshufb: -; SLM: # %bb.0: -; SLM-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: pshufb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pshufb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pshufb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pshufb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pshufb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pshufb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pshufb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pshufb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pshufb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pshufb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] -; SKX-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pshufb: -; SKX: # %bb.0: -; SKX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pshufb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [3:2.00] -; BDVER2-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [8:2.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pshufb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BDVER2-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pshufb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [2:2.00] -; BTVER2-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:2.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pshufb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; BTVER2-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pshufb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pshufb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone - -define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_psignb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psignb: -; ATOM: # %bb.0: -; ATOM-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psignb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psignb: -; SLM: # %bb.0: -; SLM-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psignb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psignb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psignb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psignb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psignb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psignb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psignb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psignb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psignb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psignb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psignb: -; SKX: # %bb.0: -; SKX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psignb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psignb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psignb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psignb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psignb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psignb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone - -define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_psignd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psignd: -; ATOM: # %bb.0: -; ATOM-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psignd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psignd: -; SLM: # %bb.0: -; SLM-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psignd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psignd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psignd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psignd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psignd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psignd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psignd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psignd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psignd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psignd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psignd: -; SKX: # %bb.0: -; SKX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psignd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psignd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psignd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psignd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psignd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psignd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %1, <4 x i32> %2) - ret <4 x i32> %3 -} -declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone - -define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psignw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psignw: -; ATOM: # %bb.0: -; ATOM-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psignw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psignw: -; SLM: # %bb.0: -; SLM-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psignw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psignw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psignw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psignw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psignw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psignw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psignw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psignw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psignw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psignw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psignw: -; SKX: # %bb.0: -; SKX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psignw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psignw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psignw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psignw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psignw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psignw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone Index: test/CodeGen/X86/tbm-schedule.ll =================================================================== --- test/CodeGen/X86/tbm-schedule.ll +++ test/CodeGen/X86/tbm-schedule.ll @@ -1,773 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4 - -define i32 @test_x86_tbm_bextri_u32(i32 %a0, i32* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_bextri_u32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 -; GENERIC-NEXT: # sched: [2:1.00] -; GENERIC-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_bextri_u32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 -; BDVER2-NEXT: # sched: [6:0.50] -; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 -; BDVER2-NEXT: # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_bextri_u32: -; BDVER3: # %bb.0: -; BDVER3-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 -; BDVER3-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 -; BDVER3-NEXT: addl %ecx, %eax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_bextri_u32: -; BDVER4: # %bb.0: -; BDVER4-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 -; BDVER4-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 -; BDVER4-NEXT: addl %ecx, %eax -; BDVER4-NEXT: retq - %a1 = load i32, i32* %p1 - %r0 = lshr i32 %a0, 4 - %m0 = lshr i32 %a1, 4 - %r1 = and i32 %r0, 4095 - %m1 = and i32 %m0, 4095 - %res = add i32 %r1, %m1 - ret i32 %res -} - -define i64 @test_x86_tbm_bextri_u64(i64 %a0, i64* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_bextri_u64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 -; GENERIC-NEXT: # sched: [2:1.00] -; GENERIC-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 -; GENERIC-NEXT: # sched: [7:1.00] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_bextri_u64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 -; BDVER2-NEXT: # sched: [6:0.50] -; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 -; BDVER2-NEXT: # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_bextri_u64: -; BDVER3: # %bb.0: -; BDVER3-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 -; BDVER3-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 -; BDVER3-NEXT: addq %rcx, %rax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_bextri_u64: -; BDVER4: # %bb.0: -; BDVER4-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 -; BDVER4-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 -; BDVER4-NEXT: addq %rcx, %rax -; BDVER4-NEXT: retq - %a1 = load i64, i64* %p1 - %r0 = lshr i64 %a0, 4 - %m0 = lshr i64 %a1, 4 - %r1 = and i64 %r0, 4095 - %m1 = and i64 %m0, 4095 - %res = add i64 %r1, %m1 - ret i64 %res -} - -define i32 @test_x86_tbm_blcfill_u32(i32 %a0, i32* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blcfill_u32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blcfilll %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blcfilll (%rsi), %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blcfill_u32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blcfilll (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: blcfilll %edi, %ecx # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blcfill_u32: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blcfilll %edi, %ecx -; BDVER3-NEXT: blcfilll (%rsi), %eax -; BDVER3-NEXT: addl %ecx, %eax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blcfill_u32: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blcfilll %edi, %ecx -; BDVER4-NEXT: blcfilll (%rsi), %eax -; BDVER4-NEXT: addl %ecx, %eax -; BDVER4-NEXT: retq - %a1 = load i32, i32* %p1 - %r0 = add i32 %a0, 1 - %m0 = add i32 %a1, 1 - %r1 = and i32 %r0, %a0 - %m1 = and i32 %m0, %a1 - %res = add i32 %r1, %m1 - ret i32 %res -} - -define i64 @test_x86_tbm_blcfill_u64(i64 %a0, i64* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blcfill_u64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blcfillq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blcfillq (%rsi), %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blcfill_u64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blcfillq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: blcfillq %rdi, %rcx # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blcfill_u64: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blcfillq %rdi, %rcx -; BDVER3-NEXT: blcfillq (%rsi), %rax -; BDVER3-NEXT: addq %rcx, %rax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blcfill_u64: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blcfillq %rdi, %rcx -; BDVER4-NEXT: blcfillq (%rsi), %rax -; BDVER4-NEXT: addq %rcx, %rax -; BDVER4-NEXT: retq - %a1 = load i64, i64* %p1 - %r0 = add i64 %a0, 1 - %m0 = add i64 %a1, 1 - %r1 = and i64 %r0, %a0 - %m1 = and i64 %m0, %a1 - %res = add i64 %r1, %m1 - ret i64 %res -} - -define i32 @test_x86_tbm_blci_u32(i32 %a0, i32* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blci_u32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blcil %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blcil (%rsi), %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blci_u32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blcil (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: blcil %edi, %ecx # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blci_u32: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blcil %edi, %ecx -; BDVER3-NEXT: blcil (%rsi), %eax -; BDVER3-NEXT: addl %ecx, %eax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blci_u32: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blcil %edi, %ecx -; BDVER4-NEXT: blcil (%rsi), %eax -; BDVER4-NEXT: addl %ecx, %eax -; BDVER4-NEXT: retq - %a1 = load i32, i32* %p1 - %r0 = add i32 1, %a0 - %m0 = add i32 1, %a1 - %r1 = xor i32 %r0, -1 - %m1 = xor i32 %m0, -1 - %r2 = or i32 %r1, %a0 - %m2 = or i32 %m1, %a1 - %res = add i32 %r2, %m2 - ret i32 %res -} - -define i64 @test_x86_tbm_blci_u64(i64 %a0, i64* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blci_u64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blciq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blciq (%rsi), %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blci_u64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blciq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: blciq %rdi, %rcx # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blci_u64: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blciq %rdi, %rcx -; BDVER3-NEXT: blciq (%rsi), %rax -; BDVER3-NEXT: addq %rcx, %rax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blci_u64: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blciq %rdi, %rcx -; BDVER4-NEXT: blciq (%rsi), %rax -; BDVER4-NEXT: addq %rcx, %rax -; BDVER4-NEXT: retq - %a1 = load i64, i64* %p1 - %r0 = add i64 1, %a0 - %m0 = add i64 1, %a1 - %r1 = xor i64 %r0, -1 - %m1 = xor i64 %m0, -1 - %r2 = or i64 %r1, %a0 - %m2 = or i64 %m1, %a1 - %res = add i64 %r2, %m2 - ret i64 %res -} - -define i32 @test_x86_tbm_blcic_u32(i32 %a0, i32* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blcic_u32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blcicl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blcicl (%rsi), %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blcic_u32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blcicl (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: blcicl %edi, %ecx # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blcic_u32: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blcicl %edi, %ecx -; BDVER3-NEXT: blcicl (%rsi), %eax -; BDVER3-NEXT: addl %ecx, %eax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blcic_u32: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blcicl %edi, %ecx -; BDVER4-NEXT: blcicl (%rsi), %eax -; BDVER4-NEXT: addl %ecx, %eax -; BDVER4-NEXT: retq - %a1 = load i32, i32* %p1 - %r0 = xor i32 %a0, -1 - %m0 = xor i32 %a1, -1 - %r1 = add i32 %a0, 1 - %m1 = add i32 %a1, 1 - %r2 = and i32 %r1, %r0 - %m2 = and i32 %m1, %m0 - %res = add i32 %r2, %m2 - ret i32 %res -} - -define i64 @test_x86_tbm_blcic_u64(i64 %a0, i64* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blcic_u64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blcicq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blcicq (%rsi), %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blcic_u64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blcicq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: blcicq %rdi, %rcx # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blcic_u64: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blcicq %rdi, %rcx -; BDVER3-NEXT: blcicq (%rsi), %rax -; BDVER3-NEXT: addq %rcx, %rax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blcic_u64: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blcicq %rdi, %rcx -; BDVER4-NEXT: blcicq (%rsi), %rax -; BDVER4-NEXT: addq %rcx, %rax -; BDVER4-NEXT: retq - %a1 = load i64, i64* %p1 - %r0 = xor i64 %a0, -1 - %m0 = xor i64 %a1, -1 - %r1 = add i64 %a0, 1 - %m1 = add i64 %a1, 1 - %r2 = and i64 %r1, %r0 - %m2 = and i64 %m1, %m0 - %res = add i64 %r2, %m2 - ret i64 %res -} - -define i32 @test_x86_tbm_blcmsk_u32(i32 %a0, i32* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blcmsk_u32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blcmskl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blcmskl (%rsi), %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blcmsk_u32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blcmskl (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: blcmskl %edi, %ecx # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blcmsk_u32: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blcmskl %edi, %ecx -; BDVER3-NEXT: blcmskl (%rsi), %eax -; BDVER3-NEXT: addl %ecx, %eax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blcmsk_u32: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blcmskl %edi, %ecx -; BDVER4-NEXT: blcmskl (%rsi), %eax -; BDVER4-NEXT: addl %ecx, %eax -; BDVER4-NEXT: retq - %a1 = load i32, i32* %p1 - %r0 = add i32 %a0, 1 - %m0 = add i32 %a1, 1 - %r1 = xor i32 %r0, %a0 - %m1 = xor i32 %m0, %a1 - %res = add i32 %r1, %m1 - ret i32 %res -} - -define i64 @test_x86_tbm_blcmsk_u64(i64 %a0, i64* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blcmsk_u64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blcmskq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blcmskq (%rsi), %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blcmsk_u64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blcmskq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: blcmskq %rdi, %rcx # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blcmsk_u64: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blcmskq %rdi, %rcx -; BDVER3-NEXT: blcmskq (%rsi), %rax -; BDVER3-NEXT: addq %rcx, %rax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blcmsk_u64: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blcmskq %rdi, %rcx -; BDVER4-NEXT: blcmskq (%rsi), %rax -; BDVER4-NEXT: addq %rcx, %rax -; BDVER4-NEXT: retq - %a1 = load i64, i64* %p1 - %r0 = add i64 %a0, 1 - %m0 = add i64 %a1, 1 - %r1 = xor i64 %r0, %a0 - %m1 = xor i64 %m0, %a1 - %res = add i64 %r1, %m1 - ret i64 %res -} - -define i32 @test_x86_tbm_blcs_u32(i32 %a0, i32* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blcs_u32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blcsl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blcsl (%rsi), %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blcs_u32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blcsl (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: blcsl %edi, %ecx # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blcs_u32: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blcsl %edi, %ecx -; BDVER3-NEXT: blcsl (%rsi), %eax -; BDVER3-NEXT: addl %ecx, %eax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blcs_u32: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blcsl %edi, %ecx -; BDVER4-NEXT: blcsl (%rsi), %eax -; BDVER4-NEXT: addl %ecx, %eax -; BDVER4-NEXT: retq - %a1 = load i32, i32* %p1 - %r0 = add i32 %a0, 1 - %m0 = add i32 %a1, 1 - %r1 = or i32 %r0, %a0 - %m1 = or i32 %m0, %a1 - %res = add i32 %r1, %m1 - ret i32 %res -} - -define i64 @test_x86_tbm_blcs_u64(i64 %a0, i64* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blcs_u64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blcsq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blcsq (%rsi), %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blcs_u64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blcsq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: blcsq %rdi, %rcx # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blcs_u64: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blcsq %rdi, %rcx -; BDVER3-NEXT: blcsq (%rsi), %rax -; BDVER3-NEXT: addq %rcx, %rax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blcs_u64: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blcsq %rdi, %rcx -; BDVER4-NEXT: blcsq (%rsi), %rax -; BDVER4-NEXT: addq %rcx, %rax -; BDVER4-NEXT: retq - %a1 = load i64, i64* %p1 - %r0 = add i64 %a0, 1 - %m0 = add i64 %a1, 1 - %r1 = or i64 %r0, %a0 - %m1 = or i64 %m0, %a1 - %res = add i64 %r1, %m1 - ret i64 %res -} - -define i32 @test_x86_tbm_blsfill_u32(i32 %a0, i32* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blsfill_u32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blsfilll %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blsfilll (%rsi), %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blsfill_u32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blsfilll (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: blsfilll %edi, %ecx # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blsfill_u32: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blsfilll %edi, %ecx -; BDVER3-NEXT: blsfilll (%rsi), %eax -; BDVER3-NEXT: addl %ecx, %eax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blsfill_u32: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blsfilll %edi, %ecx -; BDVER4-NEXT: blsfilll (%rsi), %eax -; BDVER4-NEXT: addl %ecx, %eax -; BDVER4-NEXT: retq - %a1 = load i32, i32* %p1 - %r0 = add i32 %a0, -1 - %m0 = add i32 %a1, -1 - %r1 = or i32 %r0, %a0 - %m1 = or i32 %m0, %a1 - %res = add i32 %r1, %m1 - ret i32 %res -} - -define i64 @test_x86_tbm_blsfill_u64(i64 %a0, i64* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blsfill_u64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blsfillq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blsfillq (%rsi), %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blsfill_u64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blsfillq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: blsfillq %rdi, %rcx # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blsfill_u64: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blsfillq %rdi, %rcx -; BDVER3-NEXT: blsfillq (%rsi), %rax -; BDVER3-NEXT: addq %rcx, %rax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blsfill_u64: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blsfillq %rdi, %rcx -; BDVER4-NEXT: blsfillq (%rsi), %rax -; BDVER4-NEXT: addq %rcx, %rax -; BDVER4-NEXT: retq - %a1 = load i64, i64* %p1 - %r0 = add i64 %a0, -1 - %m0 = add i64 %a1, -1 - %r1 = or i64 %r0, %a0 - %m1 = or i64 %m0, %a1 - %res = add i64 %r1, %m1 - ret i64 %res -} - -define i32 @test_x86_tbm_blsic_u32(i32 %a0, i32* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blsic_u32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blsicl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: blsicl (%rsi), %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blsic_u32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blsicl (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: blsicl %edi, %ecx # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blsic_u32: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blsicl %edi, %ecx -; BDVER3-NEXT: blsicl (%rsi), %eax -; BDVER3-NEXT: addl %ecx, %eax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blsic_u32: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blsicl %edi, %ecx -; BDVER4-NEXT: blsicl (%rsi), %eax -; BDVER4-NEXT: addl %ecx, %eax -; BDVER4-NEXT: retq - %a1 = load i32, i32* %p1 - %r0 = xor i32 %a0, -1 - %m0 = xor i32 %a1, -1 - %r1 = add i32 %a0, -1 - %m1 = add i32 %a1, -1 - %r2 = or i32 %r0, %r1 - %m2 = or i32 %m0, %m1 - %res = add i32 %r2, %m2 - ret i32 %res -} - -define i64 @test_x86_tbm_blsic_u64(i64 %a0, i64* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_blsic_u64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: blsicq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: blsicq (%rsi), %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_blsic_u64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: blsicq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: blsicq %rdi, %rcx # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_blsic_u64: -; BDVER3: # %bb.0: -; BDVER3-NEXT: blsicq %rdi, %rcx -; BDVER3-NEXT: blsicq (%rsi), %rax -; BDVER3-NEXT: addq %rcx, %rax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_blsic_u64: -; BDVER4: # %bb.0: -; BDVER4-NEXT: blsicq %rdi, %rcx -; BDVER4-NEXT: blsicq (%rsi), %rax -; BDVER4-NEXT: addq %rcx, %rax -; BDVER4-NEXT: retq - %a1 = load i64, i64* %p1 - %r0 = xor i64 %a0, -1 - %m0 = xor i64 %a1, -1 - %r1 = add i64 %a0, -1 - %m1 = add i64 %a1, -1 - %r2 = or i64 %r0, %r1 - %m2 = or i64 %m0, %m1 - %res = add i64 %r2, %m2 - ret i64 %res -} - -define i32 @test_x86_tbm_t1mskc_u32(i32 %a0, i32* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_t1mskc_u32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: t1mskcl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: t1mskcl (%rsi), %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_t1mskc_u32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: t1mskcl (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: t1mskcl %edi, %ecx # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_t1mskc_u32: -; BDVER3: # %bb.0: -; BDVER3-NEXT: t1mskcl %edi, %ecx -; BDVER3-NEXT: t1mskcl (%rsi), %eax -; BDVER3-NEXT: addl %ecx, %eax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_t1mskc_u32: -; BDVER4: # %bb.0: -; BDVER4-NEXT: t1mskcl %edi, %ecx -; BDVER4-NEXT: t1mskcl (%rsi), %eax -; BDVER4-NEXT: addl %ecx, %eax -; BDVER4-NEXT: retq - %a1 = load i32, i32* %p1 - %r0 = xor i32 %a0, -1 - %m0 = xor i32 %a1, -1 - %r1 = add i32 %a0, 1 - %m1 = add i32 %a1, 1 - %r2 = or i32 %r0, %r1 - %m2 = or i32 %m0, %m1 - %res = add i32 %r2, %m2 - ret i32 %res -} - -define i64 @test_x86_tbm_t1mskc_u64(i64 %a0, i64* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_t1mskc_u64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: t1mskcq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: t1mskcq (%rsi), %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_t1mskc_u64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: t1mskcq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: t1mskcq %rdi, %rcx # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_t1mskc_u64: -; BDVER3: # %bb.0: -; BDVER3-NEXT: t1mskcq %rdi, %rcx -; BDVER3-NEXT: t1mskcq (%rsi), %rax -; BDVER3-NEXT: addq %rcx, %rax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_t1mskc_u64: -; BDVER4: # %bb.0: -; BDVER4-NEXT: t1mskcq %rdi, %rcx -; BDVER4-NEXT: t1mskcq (%rsi), %rax -; BDVER4-NEXT: addq %rcx, %rax -; BDVER4-NEXT: retq - %a1 = load i64, i64* %p1 - %r0 = xor i64 %a0, -1 - %m0 = xor i64 %a1, -1 - %r1 = add i64 %a0, 1 - %m1 = add i64 %a1, 1 - %r2 = or i64 %r0, %r1 - %m2 = or i64 %m0, %m1 - %res = add i64 %r2, %m2 - ret i64 %res -} - -define i32 @test_x86_tbm_tzmsk_u32(i32 %a0, i32* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_tzmsk_u32: -; GENERIC: # %bb.0: -; GENERIC-NEXT: tzmskl %edi, %ecx # sched: [1:0.33] -; GENERIC-NEXT: tzmskl (%rsi), %eax # sched: [6:0.50] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_tzmsk_u32: -; BDVER2: # %bb.0: -; BDVER2-NEXT: tzmskl (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: tzmskl %edi, %ecx # sched: [2:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_tzmsk_u32: -; BDVER3: # %bb.0: -; BDVER3-NEXT: tzmskl %edi, %ecx -; BDVER3-NEXT: tzmskl (%rsi), %eax -; BDVER3-NEXT: addl %ecx, %eax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_tzmsk_u32: -; BDVER4: # %bb.0: -; BDVER4-NEXT: tzmskl %edi, %ecx -; BDVER4-NEXT: tzmskl (%rsi), %eax -; BDVER4-NEXT: addl %ecx, %eax -; BDVER4-NEXT: retq - %a1 = load i32, i32* %p1 - %r0 = xor i32 %a0, -1 - %m0 = xor i32 %a1, -1 - %r1 = add i32 %a0, -1 - %m1 = add i32 %a1, -1 - %r2 = and i32 %r0, %r1 - %m2 = and i32 %m0, %m1 - %res = add i32 %r2, %m2 - ret i32 %res -} - -define i64 @test_x86_tbm_tzmsk_u64(i64 %a0, i64* nocapture %p1) nounwind { -; GENERIC-LABEL: test_x86_tbm_tzmsk_u64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: tzmskq %rdi, %rcx # sched: [1:0.33] -; GENERIC-NEXT: tzmskq (%rsi), %rax # sched: [6:0.50] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER2-LABEL: test_x86_tbm_tzmsk_u64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: tzmskq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: tzmskq %rdi, %rcx # sched: [2:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_x86_tbm_tzmsk_u64: -; BDVER3: # %bb.0: -; BDVER3-NEXT: tzmskq %rdi, %rcx -; BDVER3-NEXT: tzmskq (%rsi), %rax -; BDVER3-NEXT: addq %rcx, %rax -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_x86_tbm_tzmsk_u64: -; BDVER4: # %bb.0: -; BDVER4-NEXT: tzmskq %rdi, %rcx -; BDVER4-NEXT: tzmskq (%rsi), %rax -; BDVER4-NEXT: addq %rcx, %rax -; BDVER4-NEXT: retq - %a1 = load i64, i64* %p1 - %r0 = xor i64 %a0, -1 - %m0 = xor i64 %a1, -1 - %r1 = add i64 %a0, -1 - %m1 = add i64 %a1, -1 - %r2 = and i64 %r0, %r1 - %m2 = and i64 %m0, %m1 - %res = add i64 %r2, %m2 - ret i64 %res -} Index: test/CodeGen/X86/x87-schedule.ll =================================================================== --- test/CodeGen/X86/x87-schedule.ll +++ test/CodeGen/X86/x87-schedule.ll @@ -1,6420 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=i686 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define void @test_f2xm1() optsize { -; GENERIC-LABEL: test_f2xm1: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: f2xm1 -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_f2xm1: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: f2xm1 # sched: [99:49.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_f2xm1: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: f2xm1 # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_f2xm1: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: f2xm1 # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_f2xm1: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: f2xm1 # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_f2xm1: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: f2xm1 # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_f2xm1: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: f2xm1 # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_f2xm1: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: f2xm1 # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_f2xm1: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: f2xm1 # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_f2xm1: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: f2xm1 # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_f2xm1: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: f2xm1 # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "f2xm1", ""() nounwind - ret void -} - -define void @test_fabs() optsize { -; GENERIC-LABEL: test_fabs: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fabs -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fabs: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fabs # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fabs: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fabs # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fabs: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fabs # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fabs: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fabs # sched: [1:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fabs: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fabs # sched: [1:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fabs: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fabs # sched: [1:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fabs: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fabs # sched: [1:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fabs: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fabs # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fabs: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fabs # sched: [2:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fabs: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fabs # sched: [2:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fabs", ""() nounwind - ret void -} - -define void @test_fadd(float *%a0, double *%a1) optsize { -; GENERIC-LABEL: test_fadd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fadd %st(0), %st(1) -; GENERIC-NEXT: fadd %st(2) -; GENERIC-NEXT: fadds (%ecx) -; GENERIC-NEXT: faddl (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fadd: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fadd %st(0), %st(1) # sched: [5:5.00] -; ATOM-NEXT: fadd %st(2) # sched: [5:5.00] -; ATOM-NEXT: fadds (%ecx) # sched: [5:5.00] -; ATOM-NEXT: faddl (%eax) # sched: [5:5.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fadd: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; SLM-NEXT: fadd %st(2) # sched: [3:1.00] -; SLM-NEXT: fadds (%ecx) # sched: [6:1.00] -; SLM-NEXT: faddl (%eax) # sched: [6:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fadd: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; SANDY-NEXT: fadd %st(2) # sched: [3:1.00] -; SANDY-NEXT: fadds (%ecx) # sched: [10:1.00] -; SANDY-NEXT: faddl (%eax) # sched: [10:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fadd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; HASWELL-NEXT: fadd %st(2) # sched: [3:1.00] -; HASWELL-NEXT: fadds (%ecx) # sched: [10:1.00] -; HASWELL-NEXT: faddl (%eax) # sched: [10:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fadd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; BROADWELL-NEXT: fadd %st(2) # sched: [3:1.00] -; BROADWELL-NEXT: fadds (%ecx) # sched: [9:1.00] -; BROADWELL-NEXT: faddl (%eax) # sched: [9:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fadd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; SKYLAKE-NEXT: fadd %st(2) # sched: [3:1.00] -; SKYLAKE-NEXT: fadds (%ecx) # sched: [10:1.00] -; SKYLAKE-NEXT: faddl (%eax) # sched: [10:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fadd: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; SKX-NEXT: fadd %st(2) # sched: [3:1.00] -; SKX-NEXT: fadds (%ecx) # sched: [10:1.00] -; SKX-NEXT: faddl (%eax) # sched: [10:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fadd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fadd %st(0), %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fadd %st(2) # sched: [5:1.00] -; BDVER2-NEXT: fadds (%ecx) # sched: [10:1.00] -; BDVER2-NEXT: faddl (%eax) # sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fadd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fadd %st(2) # sched: [3:1.00] -; BTVER2-NEXT: fadds (%ecx) # sched: [8:1.00] -; BTVER2-NEXT: faddl (%eax) # sched: [8:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fadd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; ZNVER1-NEXT: fadd %st(2) # sched: [3:1.00] -; ZNVER1-NEXT: fadds (%ecx) # sched: [10:1.00] -; ZNVER1-NEXT: faddl (%eax) # sched: [10:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fadd %st(0), %st(1) \0A\09 fadd %st(2), %st(0) \0A\09 fadds $0 \0A\09 faddl $1", "*m,*m"(float *%a0, double *%a1) nounwind - ret void -} - -define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize { -; GENERIC-LABEL: test_faddp_fiadd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: faddp %st(1) -; GENERIC-NEXT: faddp %st(2) -; GENERIC-NEXT: fiadds (%ecx) -; GENERIC-NEXT: fiaddl (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_faddp_fiadd: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: faddp %st(1) # sched: [5:5.00] -; ATOM-NEXT: faddp %st(2) # sched: [5:5.00] -; ATOM-NEXT: fiadds (%ecx) # sched: [5:5.00] -; ATOM-NEXT: fiaddl (%eax) # sched: [5:5.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_faddp_fiadd: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: faddp %st(1) # sched: [3:1.00] -; SLM-NEXT: faddp %st(2) # sched: [3:1.00] -; SLM-NEXT: fiadds (%ecx) # sched: [6:1.00] -; SLM-NEXT: fiaddl (%eax) # sched: [6:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_faddp_fiadd: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: faddp %st(1) # sched: [3:1.00] -; SANDY-NEXT: faddp %st(2) # sched: [3:1.00] -; SANDY-NEXT: fiadds (%ecx) # sched: [13:2.00] -; SANDY-NEXT: fiaddl (%eax) # sched: [13:2.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_faddp_fiadd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: faddp %st(1) # sched: [3:1.00] -; HASWELL-NEXT: faddp %st(2) # sched: [3:1.00] -; HASWELL-NEXT: fiadds (%ecx) # sched: [13:2.00] -; HASWELL-NEXT: fiaddl (%eax) # sched: [13:2.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_faddp_fiadd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: faddp %st(1) # sched: [3:1.00] -; BROADWELL-NEXT: faddp %st(2) # sched: [3:1.00] -; BROADWELL-NEXT: fiadds (%ecx) # sched: [12:2.00] -; BROADWELL-NEXT: fiaddl (%eax) # sched: [12:2.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_faddp_fiadd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: faddp %st(1) # sched: [3:1.00] -; SKYLAKE-NEXT: faddp %st(2) # sched: [3:1.00] -; SKYLAKE-NEXT: fiadds (%ecx) # sched: [13:2.00] -; SKYLAKE-NEXT: fiaddl (%eax) # sched: [13:2.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_faddp_fiadd: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: faddp %st(1) # sched: [3:1.00] -; SKX-NEXT: faddp %st(2) # sched: [3:1.00] -; SKX-NEXT: fiadds (%ecx) # sched: [13:2.00] -; SKX-NEXT: fiaddl (%eax) # sched: [13:2.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_faddp_fiadd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: faddp %st(1) # sched: [5:1.00] -; BDVER2-NEXT: faddp %st(2) # sched: [5:1.00] -; BDVER2-NEXT: fiadds (%ecx) # sched: [10:1.00] -; BDVER2-NEXT: fiaddl (%eax) # sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_faddp_fiadd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: faddp %st(1) # sched: [3:1.00] -; BTVER2-NEXT: faddp %st(2) # sched: [3:1.00] -; BTVER2-NEXT: fiadds (%ecx) # sched: [8:1.00] -; BTVER2-NEXT: fiaddl (%eax) # sched: [8:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_faddp_fiadd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: faddp %st(1) # sched: [3:1.00] -; ZNVER1-NEXT: faddp %st(2) # sched: [3:1.00] -; ZNVER1-NEXT: fiadds (%ecx) # sched: [10:1.00] -; ZNVER1-NEXT: fiaddl (%eax) # sched: [10:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "faddp \0A\09 faddp %st(2), %st(0) \0A\09 fiadds $0 \0A\09 fiaddl $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind - ret void -} - -define void @test_fbld_fbstp(i8* %a0) optsize { -; GENERIC-LABEL: test_fbld_fbstp: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fbld (%eax) -; GENERIC-NEXT: fbstp (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fbld_fbstp: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fbld (%eax) # sched: [100:0.50] -; ATOM-NEXT: fbstp (%eax) # sched: [100:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fbld_fbstp: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fbld (%eax) # sched: [100:1.00] -; SLM-NEXT: fbstp (%eax) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fbld_fbstp: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fbld (%eax) # sched: [100:0.33] -; SANDY-NEXT: fbstp (%eax) # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fbld_fbstp: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fbld (%eax) # sched: [47:10.75] -; HASWELL-NEXT: fbstp (%eax) # sched: [1:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fbld_fbstp: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fbld (%eax) # sched: [100:0.25] -; BROADWELL-NEXT: fbstp (%eax) # sched: [1:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fbld_fbstp: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fbld (%eax) # sched: [100:0.25] -; SKYLAKE-NEXT: fbstp (%eax) # sched: [1:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fbld_fbstp: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fbld (%eax) # sched: [100:0.25] -; SKX-NEXT: fbstp (%eax) # sched: [1:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fbld_fbstp: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fbld (%eax) # sched: [100:0.50] -; BDVER2-NEXT: fbstp (%eax) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fbld_fbstp: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fbld (%eax) # sched: [100:0.50] -; BTVER2-NEXT: fbstp (%eax) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fbld_fbstp: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fbld (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: fbstp (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fbld $0 \0A\09 fbstp $0", "*m"(i8 *%a0) nounwind - ret void -} - -define void @test_fchs() optsize { -; GENERIC-LABEL: test_fchs: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fchs -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fchs: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fchs # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fchs: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fchs # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fchs: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fchs # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fchs: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fchs # sched: [1:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fchs: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fchs # sched: [1:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fchs: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fchs # sched: [1:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fchs: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fchs # sched: [1:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fchs: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fchs # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fchs: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fchs # sched: [2:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fchs: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fchs # sched: [1:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fchs", ""() nounwind - ret void -} - -define void @test_fclex() optsize { -; GENERIC-LABEL: test_fclex: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: wait -; GENERIC-NEXT: fnclex -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fclex: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: wait # sched: [1:0.50] -; ATOM-NEXT: fnclex # sched: [25:12.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fclex: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: wait # sched: [100:1.00] -; SLM-NEXT: fnclex # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fclex: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: wait # sched: [100:0.33] -; SANDY-NEXT: fnclex # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fclex: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: wait # sched: [2:0.50] -; HASWELL-NEXT: fnclex # sched: [4:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fclex: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: wait # sched: [2:0.50] -; BROADWELL-NEXT: fnclex # sched: [4:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fclex: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: wait # sched: [2:0.50] -; SKYLAKE-NEXT: fnclex # sched: [4:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fclex: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: wait # sched: [2:0.50] -; SKX-NEXT: fnclex # sched: [4:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fclex: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: wait # sched: [100:0.50] -; BDVER2-NEXT: fnclex # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fclex: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: wait # sched: [100:0.50] -; BTVER2-NEXT: fnclex # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fclex: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: wait # sched: [1:1.00] -; ZNVER1-NEXT: fnclex # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fclex", ""() nounwind - ret void -} - -define void @test_fnclex() optsize { -; GENERIC-LABEL: test_fnclex: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fnclex -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fnclex: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fnclex # sched: [25:12.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fnclex: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fnclex # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fnclex: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fnclex # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fnclex: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fnclex # sched: [4:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fnclex: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fnclex # sched: [4:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fnclex: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fnclex # sched: [4:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fnclex: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fnclex # sched: [4:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fnclex: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fnclex # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fnclex: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fnclex # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fnclex: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fnclex # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fnclex", ""() nounwind - ret void -} - -define void @test_fcmov() optsize { -; GENERIC-LABEL: test_fcmov: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fcmovb %st(1), %st(0) -; GENERIC-NEXT: fcmovbe %st(1), %st(0) -; GENERIC-NEXT: fcmove %st(1), %st(0) -; GENERIC-NEXT: fcmovnb %st(1), %st(0) -; GENERIC-NEXT: fcmovnbe %st(1), %st(0) -; GENERIC-NEXT: fcmovne %st(1), %st(0) -; GENERIC-NEXT: fcmovnu %st(1), %st(0) -; GENERIC-NEXT: fcmovu %st(1), %st(0) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fcmov: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fcmovb %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmovbe %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmove %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmovnb %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmovnbe %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmovne %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmovnu %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: fcmovu %st(1), %st(0) # sched: [9:4.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fcmov: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fcmovb %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmovbe %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmove %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmovnb %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmovne %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmovnu %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: fcmovu %st(1), %st(0) # sched: [3:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fcmov: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fcmovb %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmovbe %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmove %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmovnb %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmovne %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmovnu %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: fcmovu %st(1), %st(0) # sched: [3:2.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fcmov: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fcmovb %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmovbe %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmove %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmovnb %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmovne %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmovnu %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: fcmovu %st(1), %st(0) # sched: [3:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fcmov: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fcmovb %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmovbe %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmove %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmovnb %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmovne %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmovnu %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: fcmovu %st(1), %st(0) # sched: [3:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fcmov: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fcmovb %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmovbe %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmove %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmovnb %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmovne %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmovnu %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: fcmovu %st(1), %st(0) # sched: [3:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fcmov: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fcmovb %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmovbe %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmove %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmovnb %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmovne %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmovnu %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: fcmovu %st(1), %st(0) # sched: [3:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fcmov: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fcmovb %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmovbe %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmove %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmovnb %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmovnbe %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmovne %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmovnu %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fcmovu %st(1), %st(0) # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fcmov: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fcmovb %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmovbe %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmove %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmovnb %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmovne %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmovnu %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: fcmovu %st(1), %st(0) # sched: [3:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fcmov: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fcmovb %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmovbe %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmove %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmovnb %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmovnbe %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmovne %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmovnu %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: fcmovu %st(1), %st(0) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fcmovb %st(1), %st(0) \0A\09 fcmovbe %st(1), %st(0) \0A\09 fcmove %st(1), %st(0) \0A\09 fcmovnb %st(1), %st(0) \0A\09 fcmovnbe %st(1), %st(0) \0A\09 fcmovne %st(1), %st(0) \0A\09 fcmovnu %st(1), %st(0) \0A\09 fcmovu %st(1), %st(0)", ""() nounwind - ret void -} - -define void @test_fcom(float *%a0, double *%a1) optsize { -; GENERIC-LABEL: test_fcom: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fcom %st(1) -; GENERIC-NEXT: fcom %st(3) -; GENERIC-NEXT: fcoms (%ecx) -; GENERIC-NEXT: fcoml (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fcom: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fcom %st(1) # sched: [5:5.00] -; ATOM-NEXT: fcom %st(3) # sched: [5:5.00] -; ATOM-NEXT: fcoms (%ecx) # sched: [5:5.00] -; ATOM-NEXT: fcoml (%eax) # sched: [5:5.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fcom: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fcom %st(1) # sched: [3:1.00] -; SLM-NEXT: fcom %st(3) # sched: [3:1.00] -; SLM-NEXT: fcoms (%ecx) # sched: [6:1.00] -; SLM-NEXT: fcoml (%eax) # sched: [6:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fcom: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fcom %st(1) # sched: [1:1.00] -; SANDY-NEXT: fcom %st(3) # sched: [1:1.00] -; SANDY-NEXT: fcoms (%ecx) # sched: [8:1.00] -; SANDY-NEXT: fcoml (%eax) # sched: [8:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fcom: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fcom %st(1) # sched: [1:1.00] -; HASWELL-NEXT: fcom %st(3) # sched: [1:1.00] -; HASWELL-NEXT: fcoms (%ecx) # sched: [8:1.00] -; HASWELL-NEXT: fcoml (%eax) # sched: [8:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fcom: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fcom %st(1) # sched: [1:1.00] -; BROADWELL-NEXT: fcom %st(3) # sched: [1:1.00] -; BROADWELL-NEXT: fcoms (%ecx) # sched: [7:1.00] -; BROADWELL-NEXT: fcoml (%eax) # sched: [7:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fcom: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fcom %st(1) # sched: [1:1.00] -; SKYLAKE-NEXT: fcom %st(3) # sched: [1:1.00] -; SKYLAKE-NEXT: fcoms (%ecx) # sched: [8:1.00] -; SKYLAKE-NEXT: fcoml (%eax) # sched: [8:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fcom: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fcom %st(1) # sched: [1:1.00] -; SKX-NEXT: fcom %st(3) # sched: [1:1.00] -; SKX-NEXT: fcoms (%ecx) # sched: [8:1.00] -; SKX-NEXT: fcoml (%eax) # sched: [8:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fcom: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fcom %st(1) # sched: [1:1.00] -; BDVER2-NEXT: fcom %st(3) # sched: [1:1.00] -; BDVER2-NEXT: fcoms (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: fcoml (%eax) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fcom: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fcom %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fcom %st(3) # sched: [3:1.00] -; BTVER2-NEXT: fcoms (%ecx) # sched: [8:1.00] -; BTVER2-NEXT: fcoml (%eax) # sched: [8:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fcom: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fcom %st(1) # sched: [1:1.00] -; ZNVER1-NEXT: fcom %st(3) # sched: [1:1.00] -; ZNVER1-NEXT: fcoms (%ecx) # sched: [8:1.00] -; ZNVER1-NEXT: fcoml (%eax) # sched: [8:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fcom \0A\09 fcom %st(3) \0A\09 fcoms $0 \0A\09 fcoml $1", "*m,*m"(float *%a0, double *%a1) nounwind - ret void -} - -define void @test_fcomp_fcompp(float *%a0, double *%a1) optsize { -; GENERIC-LABEL: test_fcomp_fcompp: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fcomp %st(1) -; GENERIC-NEXT: fcomp %st(3) -; GENERIC-NEXT: fcomps (%ecx) -; GENERIC-NEXT: fcompl (%eax) -; GENERIC-NEXT: fcompp -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fcomp_fcompp: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fcomp %st(1) # sched: [5:5.00] -; ATOM-NEXT: fcomp %st(3) # sched: [5:5.00] -; ATOM-NEXT: fcomps (%ecx) # sched: [5:5.00] -; ATOM-NEXT: fcompl (%eax) # sched: [5:5.00] -; ATOM-NEXT: fcompp # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fcomp_fcompp: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fcomp %st(1) # sched: [3:1.00] -; SLM-NEXT: fcomp %st(3) # sched: [3:1.00] -; SLM-NEXT: fcomps (%ecx) # sched: [6:1.00] -; SLM-NEXT: fcompl (%eax) # sched: [6:1.00] -; SLM-NEXT: fcompp # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fcomp_fcompp: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fcomp %st(1) # sched: [1:1.00] -; SANDY-NEXT: fcomp %st(3) # sched: [1:1.00] -; SANDY-NEXT: fcomps (%ecx) # sched: [8:1.00] -; SANDY-NEXT: fcompl (%eax) # sched: [8:1.00] -; SANDY-NEXT: fcompp # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fcomp_fcompp: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fcomp %st(1) # sched: [1:1.00] -; HASWELL-NEXT: fcomp %st(3) # sched: [1:1.00] -; HASWELL-NEXT: fcomps (%ecx) # sched: [8:1.00] -; HASWELL-NEXT: fcompl (%eax) # sched: [8:1.00] -; HASWELL-NEXT: fcompp # sched: [1:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fcomp_fcompp: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fcomp %st(1) # sched: [1:1.00] -; BROADWELL-NEXT: fcomp %st(3) # sched: [1:1.00] -; BROADWELL-NEXT: fcomps (%ecx) # sched: [7:1.00] -; BROADWELL-NEXT: fcompl (%eax) # sched: [7:1.00] -; BROADWELL-NEXT: fcompp # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fcomp_fcompp: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fcomp %st(1) # sched: [1:1.00] -; SKYLAKE-NEXT: fcomp %st(3) # sched: [1:1.00] -; SKYLAKE-NEXT: fcomps (%ecx) # sched: [8:1.00] -; SKYLAKE-NEXT: fcompl (%eax) # sched: [8:1.00] -; SKYLAKE-NEXT: fcompp # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fcomp_fcompp: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fcomp %st(1) # sched: [1:1.00] -; SKX-NEXT: fcomp %st(3) # sched: [1:1.00] -; SKX-NEXT: fcomps (%ecx) # sched: [8:1.00] -; SKX-NEXT: fcompl (%eax) # sched: [8:1.00] -; SKX-NEXT: fcompp # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fcomp_fcompp: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fcomp %st(1) # sched: [1:1.00] -; BDVER2-NEXT: fcomp %st(3) # sched: [1:1.00] -; BDVER2-NEXT: fcomps (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: fcompl (%eax) # sched: [6:1.00] -; BDVER2-NEXT: fcompp # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fcomp_fcompp: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fcomp %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fcomp %st(3) # sched: [3:1.00] -; BTVER2-NEXT: fcomps (%ecx) # sched: [8:1.00] -; BTVER2-NEXT: fcompl (%eax) # sched: [8:1.00] -; BTVER2-NEXT: fcompp # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fcomp_fcompp: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fcomp %st(1) # sched: [1:1.00] -; ZNVER1-NEXT: fcomp %st(3) # sched: [1:1.00] -; ZNVER1-NEXT: fcomps (%ecx) # sched: [8:1.00] -; ZNVER1-NEXT: fcompl (%eax) # sched: [8:1.00] -; ZNVER1-NEXT: fcompp # sched: [1:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fcomp \0A\09 fcomp %st(3) \0A\09 fcomps $0 \0A\09 fcompl $1 \0A\09 fcompp", "*m,*m"(float *%a0, double *%a1) nounwind - ret void -} - -define void @test_fcomi_fcomip() optsize { -; GENERIC-LABEL: test_fcomi_fcomip: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fcomi %st(3) -; GENERIC-NEXT: fcompi %st(3) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fcomi_fcomip: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fcomi %st(3) # sched: [9:4.50] -; ATOM-NEXT: fcompi %st(3) # sched: [9:4.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fcomi_fcomip: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fcomi %st(3) # sched: [3:1.00] -; SLM-NEXT: fcompi %st(3) # sched: [3:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fcomi_fcomip: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fcomi %st(3) # sched: [3:1.00] -; SANDY-NEXT: fcompi %st(3) # sched: [3:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fcomi_fcomip: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fcomi %st(3) # sched: [1:0.50] -; HASWELL-NEXT: fcompi %st(3) # sched: [1:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fcomi_fcomip: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fcomi %st(3) # sched: [3:1.00] -; BROADWELL-NEXT: fcompi %st(3) # sched: [3:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fcomi_fcomip: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fcomi %st(3) # sched: [2:1.00] -; SKYLAKE-NEXT: fcompi %st(3) # sched: [2:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fcomi_fcomip: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fcomi %st(3) # sched: [2:1.00] -; SKX-NEXT: fcompi %st(3) # sched: [2:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fcomi_fcomip: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fcomi %st(3) # sched: [1:1.00] -; BDVER2-NEXT: fcompi %st(3) # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fcomi_fcomip: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fcomi %st(3) # sched: [3:1.00] -; BTVER2-NEXT: fcompi %st(3) # sched: [3:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fcomi_fcomip: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fcomi %st(3) # sched: [9:0.50] -; ZNVER1-NEXT: fcompi %st(3) # sched: [9:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fcomi %st(3) \0A\09 fcomip %st(3)", ""() nounwind - ret void -} - -define void @test_fcos() optsize { -; GENERIC-LABEL: test_fcos: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fcos -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fcos: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fcos # sched: [174:87.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fcos: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fcos # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fcos: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fcos # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fcos: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fcos # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fcos: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fcos # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fcos: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fcos # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fcos: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fcos # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fcos: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fcos # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fcos: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fcos # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fcos: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fcos # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fcos", ""() nounwind - ret void -} - -define void @test_fdecstp() optsize { -; GENERIC-LABEL: test_fdecstp: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fdecstp -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fdecstp: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fdecstp # sched: [1:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fdecstp: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fdecstp # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fdecstp: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fdecstp # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fdecstp: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fdecstp # sched: [2:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fdecstp: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fdecstp # sched: [2:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fdecstp: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fdecstp # sched: [2:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fdecstp: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fdecstp # sched: [2:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fdecstp: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdecstp # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fdecstp: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fdecstp # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fdecstp: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fdecstp # sched: [11:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fdecstp", ""() nounwind - ret void -} - -define void @test_fdiv(float *%a0, double *%a1) optsize { -; GENERIC-LABEL: test_fdiv: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fdiv %st(0), %st(1) -; GENERIC-NEXT: fdiv %st(2) -; GENERIC-NEXT: fdivs (%ecx) -; GENERIC-NEXT: fdivl (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fdiv: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fdiv %st(0), %st(1) # sched: [34:17.00] -; ATOM-NEXT: fdiv %st(2) # sched: [34:17.00] -; ATOM-NEXT: fdivs (%ecx) # sched: [34:17.00] -; ATOM-NEXT: fdivl (%eax) # sched: [34:17.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fdiv: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fdiv %st(0), %st(1) # sched: [19:17.00] -; SLM-NEXT: fdiv %st(2) # sched: [19:17.00] -; SLM-NEXT: fdivs (%ecx) # sched: [22:17.00] -; SLM-NEXT: fdivl (%eax) # sched: [22:17.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fdiv: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fdiv %st(0), %st(1) # sched: [14:14.00] -; SANDY-NEXT: fdiv %st(2) # sched: [14:14.00] -; SANDY-NEXT: fdivs (%ecx) # sched: [31:1.00] -; SANDY-NEXT: fdivl (%eax) # sched: [31:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fdiv: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fdiv %st(0), %st(1) # sched: [24:1.00] -; HASWELL-NEXT: fdiv %st(2) # sched: [20:1.00] -; HASWELL-NEXT: fdivs (%ecx) # sched: [31:1.00] -; HASWELL-NEXT: fdivl (%eax) # sched: [31:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fdiv: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fdiv %st(0), %st(1) # sched: [15:1.00] -; BROADWELL-NEXT: fdiv %st(2) # sched: [20:1.00] -; BROADWELL-NEXT: fdivs (%ecx) # sched: [21:1.00] -; BROADWELL-NEXT: fdivl (%eax) # sched: [21:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fdiv: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fdiv %st(0), %st(1) # sched: [15:1.00] -; SKYLAKE-NEXT: fdiv %st(2) # sched: [20:1.00] -; SKYLAKE-NEXT: fdivs (%ecx) # sched: [22:1.00] -; SKYLAKE-NEXT: fdivl (%eax) # sched: [22:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fdiv: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fdiv %st(0), %st(1) # sched: [15:1.00] -; SKX-NEXT: fdiv %st(2) # sched: [20:1.00] -; SKX-NEXT: fdivs (%ecx) # sched: [22:1.00] -; SKX-NEXT: fdivl (%eax) # sched: [22:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fdiv: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdiv %st(0), %st(1) # sched: [9:9.50] -; BDVER2-NEXT: fdiv %st(2) # sched: [9:9.50] -; BDVER2-NEXT: fdivs (%ecx) # sched: [14:9.50] -; BDVER2-NEXT: fdivl (%eax) # sched: [14:9.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fdiv: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fdiv %st(0), %st(1) # sched: [19:19.00] -; BTVER2-NEXT: fdiv %st(2) # sched: [19:19.00] -; BTVER2-NEXT: fdivs (%ecx) # sched: [24:19.00] -; BTVER2-NEXT: fdivl (%eax) # sched: [24:19.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fdiv: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fdiv %st(0), %st(1) # sched: [15:1.00] -; ZNVER1-NEXT: fdiv %st(2) # sched: [15:1.00] -; ZNVER1-NEXT: fdivs (%ecx) # sched: [22:1.00] -; ZNVER1-NEXT: fdivl (%eax) # sched: [22:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fdiv %st(0), %st(1) \0A\09 fdiv %st(2), %st(0) \0A\09 fdivs $0 \0A\09 fdivl $1", "*m,*m"(float *%a0, double *%a1) nounwind - ret void -} - -define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize { -; GENERIC-LABEL: test_fdivp_fidiv: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fdivp %st(1) -; GENERIC-NEXT: fdivp %st(2) -; GENERIC-NEXT: fidivs (%ecx) -; GENERIC-NEXT: fidivl (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fdivp_fidiv: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fdivp %st(1) # sched: [34:17.00] -; ATOM-NEXT: fdivp %st(2) # sched: [34:17.00] -; ATOM-NEXT: fidivs (%ecx) # sched: [34:17.00] -; ATOM-NEXT: fidivl (%eax) # sched: [34:17.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fdivp_fidiv: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fdivp %st(1) # sched: [19:17.00] -; SLM-NEXT: fdivp %st(2) # sched: [19:17.00] -; SLM-NEXT: fidivs (%ecx) # sched: [22:17.00] -; SLM-NEXT: fidivl (%eax) # sched: [22:17.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fdivp_fidiv: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fdivp %st(1) # sched: [14:14.00] -; SANDY-NEXT: fdivp %st(2) # sched: [14:14.00] -; SANDY-NEXT: fidivs (%ecx) # sched: [34:1.00] -; SANDY-NEXT: fidivl (%eax) # sched: [34:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fdivp_fidiv: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fdivp %st(1) # sched: [24:1.00] -; HASWELL-NEXT: fdivp %st(2) # sched: [24:1.00] -; HASWELL-NEXT: fidivs (%ecx) # sched: [34:1.00] -; HASWELL-NEXT: fidivl (%eax) # sched: [34:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fdivp_fidiv: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fdivp %st(1) # sched: [15:1.00] -; BROADWELL-NEXT: fdivp %st(2) # sched: [15:1.00] -; BROADWELL-NEXT: fidivs (%ecx) # sched: [24:1.00] -; BROADWELL-NEXT: fidivl (%eax) # sched: [24:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fdivp_fidiv: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fdivp %st(1) # sched: [15:1.00] -; SKYLAKE-NEXT: fdivp %st(2) # sched: [15:1.00] -; SKYLAKE-NEXT: fidivs (%ecx) # sched: [25:1.00] -; SKYLAKE-NEXT: fidivl (%eax) # sched: [25:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fdivp_fidiv: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fdivp %st(1) # sched: [15:1.00] -; SKX-NEXT: fdivp %st(2) # sched: [15:1.00] -; SKX-NEXT: fidivs (%ecx) # sched: [25:1.00] -; SKX-NEXT: fidivl (%eax) # sched: [25:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fdivp_fidiv: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdivp %st(1) # sched: [9:9.50] -; BDVER2-NEXT: fdivp %st(2) # sched: [9:9.50] -; BDVER2-NEXT: fidivs (%ecx) # sched: [14:9.50] -; BDVER2-NEXT: fidivl (%eax) # sched: [14:9.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fdivp_fidiv: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fdivp %st(1) # sched: [19:19.00] -; BTVER2-NEXT: fdivp %st(2) # sched: [19:19.00] -; BTVER2-NEXT: fidivs (%ecx) # sched: [24:19.00] -; BTVER2-NEXT: fidivl (%eax) # sched: [24:19.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fdivp_fidiv: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fdivp %st(1) # sched: [15:1.00] -; ZNVER1-NEXT: fdivp %st(2) # sched: [15:1.00] -; ZNVER1-NEXT: fidivs (%ecx) # sched: [22:1.00] -; ZNVER1-NEXT: fidivl (%eax) # sched: [22:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fdivp \0A\09 fdivp %st(2), %st(0) \0A\09 fidivs $0 \0A\09 fidivl $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind - ret void -} - -define void @test_fdivr(float *%a0, double *%a1) optsize { -; GENERIC-LABEL: test_fdivr: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fdivr %st(0), %st(1) -; GENERIC-NEXT: fdivr %st(2) -; GENERIC-NEXT: fdivrs (%ecx) -; GENERIC-NEXT: fdivrl (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fdivr: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fdivr %st(0), %st(1) # sched: [34:17.00] -; ATOM-NEXT: fdivr %st(2) # sched: [34:17.00] -; ATOM-NEXT: fdivrs (%ecx) # sched: [34:17.00] -; ATOM-NEXT: fdivrl (%eax) # sched: [34:17.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fdivr: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fdivr %st(0), %st(1) # sched: [19:17.00] -; SLM-NEXT: fdivr %st(2) # sched: [19:17.00] -; SLM-NEXT: fdivrs (%ecx) # sched: [22:17.00] -; SLM-NEXT: fdivrl (%eax) # sched: [22:17.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fdivr: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fdivr %st(0), %st(1) # sched: [14:14.00] -; SANDY-NEXT: fdivr %st(2) # sched: [14:14.00] -; SANDY-NEXT: fdivrs (%ecx) # sched: [31:1.00] -; SANDY-NEXT: fdivrl (%eax) # sched: [31:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fdivr: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fdivr %st(0), %st(1) # sched: [20:1.00] -; HASWELL-NEXT: fdivr %st(2) # sched: [24:1.00] -; HASWELL-NEXT: fdivrs (%ecx) # sched: [27:1.00] -; HASWELL-NEXT: fdivrl (%eax) # sched: [27:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fdivr: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fdivr %st(0), %st(1) # sched: [20:1.00] -; BROADWELL-NEXT: fdivr %st(2) # sched: [15:1.00] -; BROADWELL-NEXT: fdivrs (%ecx) # sched: [26:1.00] -; BROADWELL-NEXT: fdivrl (%eax) # sched: [26:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fdivr: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fdivr %st(0), %st(1) # sched: [20:1.00] -; SKYLAKE-NEXT: fdivr %st(2) # sched: [15:1.00] -; SKYLAKE-NEXT: fdivrs (%ecx) # sched: [27:1.00] -; SKYLAKE-NEXT: fdivrl (%eax) # sched: [27:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fdivr: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fdivr %st(0), %st(1) # sched: [20:1.00] -; SKX-NEXT: fdivr %st(2) # sched: [15:1.00] -; SKX-NEXT: fdivrs (%ecx) # sched: [27:1.00] -; SKX-NEXT: fdivrl (%eax) # sched: [27:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fdivr: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdivr %st(0), %st(1) # sched: [9:9.50] -; BDVER2-NEXT: fdivr %st(2) # sched: [9:9.50] -; BDVER2-NEXT: fdivrs (%ecx) # sched: [14:9.50] -; BDVER2-NEXT: fdivrl (%eax) # sched: [14:9.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fdivr: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fdivr %st(0), %st(1) # sched: [19:19.00] -; BTVER2-NEXT: fdivr %st(2) # sched: [19:19.00] -; BTVER2-NEXT: fdivrs (%ecx) # sched: [24:19.00] -; BTVER2-NEXT: fdivrl (%eax) # sched: [24:19.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fdivr: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fdivr %st(0), %st(1) # sched: [15:1.00] -; ZNVER1-NEXT: fdivr %st(2) # sched: [15:1.00] -; ZNVER1-NEXT: fdivrs (%ecx) # sched: [22:1.00] -; ZNVER1-NEXT: fdivrl (%eax) # sched: [22:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fdivr %st(0), %st(1) \0A\09 fdivr %st(2), %st(0) \0A\09 fdivrs $0 \0A\09 fdivrl $1", "*m,*m"(float *%a0, double *%a1) nounwind - ret void -} - -define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize { -; GENERIC-LABEL: test_fdivrp_fidivr: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fdivrp %st(1) -; GENERIC-NEXT: fdivrp %st(2) -; GENERIC-NEXT: fidivrs (%ecx) -; GENERIC-NEXT: fidivrl (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fdivrp_fidivr: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fdivrp %st(1) # sched: [34:17.00] -; ATOM-NEXT: fdivrp %st(2) # sched: [34:17.00] -; ATOM-NEXT: fidivrs (%ecx) # sched: [34:17.00] -; ATOM-NEXT: fidivrl (%eax) # sched: [34:17.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fdivrp_fidivr: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fdivrp %st(1) # sched: [19:17.00] -; SLM-NEXT: fdivrp %st(2) # sched: [19:17.00] -; SLM-NEXT: fidivrs (%ecx) # sched: [22:17.00] -; SLM-NEXT: fidivrl (%eax) # sched: [22:17.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fdivrp_fidivr: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fdivrp %st(1) # sched: [14:14.00] -; SANDY-NEXT: fdivrp %st(2) # sched: [14:14.00] -; SANDY-NEXT: fidivrs (%ecx) # sched: [34:1.00] -; SANDY-NEXT: fidivrl (%eax) # sched: [34:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fdivrp_fidivr: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fdivrp %st(1) # sched: [20:1.00] -; HASWELL-NEXT: fdivrp %st(2) # sched: [20:1.00] -; HASWELL-NEXT: fidivrs (%ecx) # sched: [30:1.00] -; HASWELL-NEXT: fidivrl (%eax) # sched: [30:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fdivrp_fidivr: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fdivrp %st(1) # sched: [20:1.00] -; BROADWELL-NEXT: fdivrp %st(2) # sched: [20:1.00] -; BROADWELL-NEXT: fidivrs (%ecx) # sched: [29:1.00] -; BROADWELL-NEXT: fidivrl (%eax) # sched: [29:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fdivrp_fidivr: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fdivrp %st(1) # sched: [20:1.00] -; SKYLAKE-NEXT: fdivrp %st(2) # sched: [20:1.00] -; SKYLAKE-NEXT: fidivrs (%ecx) # sched: [30:1.00] -; SKYLAKE-NEXT: fidivrl (%eax) # sched: [30:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fdivrp_fidivr: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fdivrp %st(1) # sched: [20:1.00] -; SKX-NEXT: fdivrp %st(2) # sched: [20:1.00] -; SKX-NEXT: fidivrs (%ecx) # sched: [30:1.00] -; SKX-NEXT: fidivrl (%eax) # sched: [30:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fdivrp_fidivr: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdivrp %st(1) # sched: [9:9.50] -; BDVER2-NEXT: fdivrp %st(2) # sched: [9:9.50] -; BDVER2-NEXT: fidivrs (%ecx) # sched: [14:9.50] -; BDVER2-NEXT: fidivrl (%eax) # sched: [14:9.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fdivrp_fidivr: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fdivrp %st(1) # sched: [19:19.00] -; BTVER2-NEXT: fdivrp %st(2) # sched: [19:19.00] -; BTVER2-NEXT: fidivrs (%ecx) # sched: [24:19.00] -; BTVER2-NEXT: fidivrl (%eax) # sched: [24:19.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fdivrp_fidivr: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fdivrp %st(1) # sched: [15:1.00] -; ZNVER1-NEXT: fdivrp %st(2) # sched: [15:1.00] -; ZNVER1-NEXT: fidivrs (%ecx) # sched: [22:1.00] -; ZNVER1-NEXT: fidivrl (%eax) # sched: [22:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fdivrp \0A\09 fdivrp %st(2), %st(0) \0A\09 fidivrs $0 \0A\09 fidivrl $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind - ret void -} - -define void @test_ffree() optsize { -; GENERIC-LABEL: test_ffree: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: ffree %st(0) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_ffree: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: ffree %st(0) # sched: [1:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_ffree: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: ffree %st(0) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_ffree: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: ffree %st(0) # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_ffree: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: ffree %st(0) # sched: [1:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ffree: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: ffree %st(0) # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_ffree: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: ffree %st(0) # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_ffree: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: ffree %st(0) # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_ffree: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: ffree %st(0) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_ffree: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: ffree %st(0) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ffree: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: ffree %st(0) # sched: [11:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "ffree %st(0)", ""() nounwind - ret void -} - -define void @test_ficom(i16 *%a0, i32 *%a1) optsize { -; GENERIC-LABEL: test_ficom: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: ficoms (%ecx) -; GENERIC-NEXT: ficoml (%eax) -; GENERIC-NEXT: ficomps (%ecx) -; GENERIC-NEXT: ficompl (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_ficom: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: ficoms (%ecx) # sched: [5:5.00] -; ATOM-NEXT: ficoml (%eax) # sched: [5:5.00] -; ATOM-NEXT: ficomps (%ecx) # sched: [5:5.00] -; ATOM-NEXT: ficompl (%eax) # sched: [5:5.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_ficom: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: ficoms (%ecx) # sched: [6:1.00] -; SLM-NEXT: ficoml (%eax) # sched: [6:1.00] -; SLM-NEXT: ficomps (%ecx) # sched: [6:1.00] -; SLM-NEXT: ficompl (%eax) # sched: [6:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_ficom: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: ficoms (%ecx) # sched: [11:2.00] -; SANDY-NEXT: ficoml (%eax) # sched: [11:2.00] -; SANDY-NEXT: ficomps (%ecx) # sched: [11:2.00] -; SANDY-NEXT: ficompl (%eax) # sched: [11:2.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_ficom: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: ficoms (%ecx) # sched: [11:2.00] -; HASWELL-NEXT: ficoml (%eax) # sched: [11:2.00] -; HASWELL-NEXT: ficomps (%ecx) # sched: [11:2.00] -; HASWELL-NEXT: ficompl (%eax) # sched: [11:2.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ficom: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: ficoms (%ecx) # sched: [10:2.00] -; BROADWELL-NEXT: ficoml (%eax) # sched: [10:2.00] -; BROADWELL-NEXT: ficomps (%ecx) # sched: [10:2.00] -; BROADWELL-NEXT: ficompl (%eax) # sched: [10:2.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_ficom: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: ficoms (%ecx) # sched: [11:2.00] -; SKYLAKE-NEXT: ficoml (%eax) # sched: [11:2.00] -; SKYLAKE-NEXT: ficomps (%ecx) # sched: [11:2.00] -; SKYLAKE-NEXT: ficompl (%eax) # sched: [11:2.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_ficom: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: ficoms (%ecx) # sched: [11:2.00] -; SKX-NEXT: ficoml (%eax) # sched: [11:2.00] -; SKX-NEXT: ficomps (%ecx) # sched: [11:2.00] -; SKX-NEXT: ficompl (%eax) # sched: [11:2.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_ficom: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: ficoms (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: ficoml (%eax) # sched: [6:1.00] -; BDVER2-NEXT: ficomps (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: ficompl (%eax) # sched: [6:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_ficom: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: ficoms (%ecx) # sched: [8:1.00] -; BTVER2-NEXT: ficoml (%eax) # sched: [8:1.00] -; BTVER2-NEXT: ficomps (%ecx) # sched: [8:1.00] -; BTVER2-NEXT: ficompl (%eax) # sched: [8:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ficom: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: ficoms (%ecx) # sched: [12:1.50] -; ZNVER1-NEXT: ficoml (%eax) # sched: [12:1.50] -; ZNVER1-NEXT: ficomps (%ecx) # sched: [12:1.50] -; ZNVER1-NEXT: ficompl (%eax) # sched: [12:1.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "ficoms $0 \0A\09 ficoml $1 \0A\09 ficomps $0 \0A\09 ficompl $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind - ret void -} - -define void @test_fild(i16 *%a0, i32 *%a1, i64 *%a2) optsize { -; GENERIC-LABEL: test_fild: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: filds (%edx) -; GENERIC-NEXT: fildl (%ecx) -; GENERIC-NEXT: fildll (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fild: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: filds (%edx) # sched: [5:5.00] -; ATOM-NEXT: fildl (%ecx) # sched: [5:5.00] -; ATOM-NEXT: fildll (%eax) # sched: [5:5.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fild: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: filds (%edx) # sched: [3:1.00] -; SLM-NEXT: fildl (%ecx) # sched: [3:1.00] -; SLM-NEXT: fildll (%eax) # sched: [3:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fild: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: filds (%edx) # sched: [10:1.00] -; SANDY-NEXT: fildl (%ecx) # sched: [10:1.00] -; SANDY-NEXT: fildll (%eax) # sched: [10:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fild: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: filds (%edx) # sched: [10:1.00] -; HASWELL-NEXT: fildl (%ecx) # sched: [10:1.00] -; HASWELL-NEXT: fildll (%eax) # sched: [10:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fild: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: filds (%edx) # sched: [9:1.00] -; BROADWELL-NEXT: fildl (%ecx) # sched: [9:1.00] -; BROADWELL-NEXT: fildll (%eax) # sched: [9:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fild: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: filds (%edx) # sched: [10:1.00] -; SKYLAKE-NEXT: fildl (%ecx) # sched: [10:1.00] -; SKYLAKE-NEXT: fildll (%eax) # sched: [10:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fild: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: filds (%edx) # sched: [10:1.00] -; SKX-NEXT: fildl (%ecx) # sched: [10:1.00] -; SKX-NEXT: fildll (%eax) # sched: [10:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fild: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: filds (%edx) # sched: [5:0.50] -; BDVER2-NEXT: fildl (%ecx) # sched: [5:0.50] -; BDVER2-NEXT: fildll (%eax) # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fild: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: filds (%edx) # sched: [3:1.00] -; BTVER2-NEXT: fildl (%ecx) # sched: [3:1.00] -; BTVER2-NEXT: fildll (%eax) # sched: [3:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fild: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: filds (%edx) # sched: [11:1.00] -; ZNVER1-NEXT: fildl (%ecx) # sched: [11:1.00] -; ZNVER1-NEXT: fildll (%eax) # sched: [11:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "filds $0 \0A\09 fildl $1 \0A\09 fildll $2", "*m,*m,*m"(i16 *%a0, i32 *%a1, i64 *%a2) nounwind - ret void -} - -define void @test_fincstp() optsize { -; GENERIC-LABEL: test_fincstp: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fincstp -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fincstp: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fincstp # sched: [1:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fincstp: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fincstp # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fincstp: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fincstp # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fincstp: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fincstp # sched: [1:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fincstp: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fincstp # sched: [1:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fincstp: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fincstp # sched: [1:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fincstp: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fincstp # sched: [1:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fincstp: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fincstp # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fincstp: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fincstp # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fincstp: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fincstp # sched: [11:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fincstp", ""() nounwind - ret void -} - -define void @test_finit() optsize { -; GENERIC-LABEL: test_finit: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: wait -; GENERIC-NEXT: fninit -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_finit: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: wait # sched: [1:0.50] -; ATOM-NEXT: fninit # sched: [63:31.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_finit: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: wait # sched: [100:1.00] -; SLM-NEXT: fninit # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_finit: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: wait # sched: [100:0.33] -; SANDY-NEXT: fninit # sched: [5:1.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_finit: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: wait # sched: [2:0.50] -; HASWELL-NEXT: fninit # sched: [75:6.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_finit: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: wait # sched: [2:0.50] -; BROADWELL-NEXT: fninit # sched: [75:6.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_finit: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: wait # sched: [2:0.50] -; SKYLAKE-NEXT: fninit # sched: [75:6.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_finit: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: wait # sched: [2:0.50] -; SKX-NEXT: fninit # sched: [75:6.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_finit: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: wait # sched: [100:0.50] -; BDVER2-NEXT: fninit # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_finit: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: wait # sched: [100:0.50] -; BTVER2-NEXT: fninit # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_finit: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: wait # sched: [1:1.00] -; ZNVER1-NEXT: fninit # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "finit", ""() nounwind - ret void -} - -define void @test_fninit() optsize { -; GENERIC-LABEL: test_fninit: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fninit -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fninit: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fninit # sched: [63:31.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fninit: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fninit # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fninit: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fninit # sched: [5:1.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fninit: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fninit # sched: [75:6.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fninit: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fninit # sched: [75:6.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fninit: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fninit # sched: [75:6.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fninit: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fninit # sched: [75:6.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fninit: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fninit # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fninit: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fninit # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fninit: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fninit # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fninit", ""() nounwind - ret void -} - -define void @test_fist_fistp_fisttp(i16* %a0, i32* %a1, i64 *%a2) optsize { -; GENERIC-LABEL: test_fist_fistp_fisttp: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fists (%edx) -; GENERIC-NEXT: fistl (%ecx) -; GENERIC-NEXT: fistps (%edx) -; GENERIC-NEXT: fistpl (%ecx) -; GENERIC-NEXT: fistpll (%eax) -; GENERIC-NEXT: fisttps (%edx) -; GENERIC-NEXT: fisttpl (%ecx) -; GENERIC-NEXT: fisttpll (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fist_fistp_fisttp: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fists (%edx) # sched: [6:3.00] -; ATOM-NEXT: fistl (%ecx) # sched: [6:3.00] -; ATOM-NEXT: fistps (%edx) # sched: [6:3.00] -; ATOM-NEXT: fistpl (%ecx) # sched: [6:3.00] -; ATOM-NEXT: fistpll (%eax) # sched: [6:3.00] -; ATOM-NEXT: fisttps (%edx) # sched: [2:1.00] -; ATOM-NEXT: fisttpl (%ecx) # sched: [2:1.00] -; ATOM-NEXT: fisttpll (%eax) # sched: [2:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fist_fistp_fisttp: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fists (%edx) # sched: [1:1.00] -; SLM-NEXT: fistl (%ecx) # sched: [1:1.00] -; SLM-NEXT: fistps (%edx) # sched: [1:1.00] -; SLM-NEXT: fistpl (%ecx) # sched: [1:1.00] -; SLM-NEXT: fistpll (%eax) # sched: [1:1.00] -; SLM-NEXT: fisttps (%edx) # sched: [1:1.00] -; SLM-NEXT: fisttpl (%ecx) # sched: [1:1.00] -; SLM-NEXT: fisttpll (%eax) # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fist_fistp_fisttp: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fists (%edx) # sched: [9:1.00] -; SANDY-NEXT: fistl (%ecx) # sched: [9:1.00] -; SANDY-NEXT: fistps (%edx) # sched: [9:1.00] -; SANDY-NEXT: fistpl (%ecx) # sched: [9:1.00] -; SANDY-NEXT: fistpll (%eax) # sched: [9:1.00] -; SANDY-NEXT: fisttps (%edx) # sched: [5:1.00] -; SANDY-NEXT: fisttpl (%ecx) # sched: [5:1.00] -; SANDY-NEXT: fisttpll (%eax) # sched: [5:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fist_fistp_fisttp: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fists (%edx) # sched: [4:1.00] -; HASWELL-NEXT: fistl (%ecx) # sched: [4:1.00] -; HASWELL-NEXT: fistps (%edx) # sched: [4:1.00] -; HASWELL-NEXT: fistpl (%ecx) # sched: [4:1.00] -; HASWELL-NEXT: fistpll (%eax) # sched: [4:1.00] -; HASWELL-NEXT: fisttps (%edx) # sched: [4:1.00] -; HASWELL-NEXT: fisttpl (%ecx) # sched: [4:1.00] -; HASWELL-NEXT: fisttpll (%eax) # sched: [4:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fist_fistp_fisttp: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fists (%edx) # sched: [4:1.00] -; BROADWELL-NEXT: fistl (%ecx) # sched: [4:1.00] -; BROADWELL-NEXT: fistps (%edx) # sched: [4:1.00] -; BROADWELL-NEXT: fistpl (%ecx) # sched: [4:1.00] -; BROADWELL-NEXT: fistpll (%eax) # sched: [4:1.00] -; BROADWELL-NEXT: fisttps (%edx) # sched: [4:1.00] -; BROADWELL-NEXT: fisttpl (%ecx) # sched: [4:1.00] -; BROADWELL-NEXT: fisttpll (%eax) # sched: [4:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fist_fistp_fisttp: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fists (%edx) # sched: [4:1.00] -; SKYLAKE-NEXT: fistl (%ecx) # sched: [4:1.00] -; SKYLAKE-NEXT: fistps (%edx) # sched: [4:1.00] -; SKYLAKE-NEXT: fistpl (%ecx) # sched: [4:1.00] -; SKYLAKE-NEXT: fistpll (%eax) # sched: [4:1.00] -; SKYLAKE-NEXT: fisttps (%edx) # sched: [4:1.00] -; SKYLAKE-NEXT: fisttpl (%ecx) # sched: [4:1.00] -; SKYLAKE-NEXT: fisttpll (%eax) # sched: [4:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fist_fistp_fisttp: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fists (%edx) # sched: [4:1.00] -; SKX-NEXT: fistl (%ecx) # sched: [4:1.00] -; SKX-NEXT: fistps (%edx) # sched: [4:1.00] -; SKX-NEXT: fistpl (%ecx) # sched: [4:1.00] -; SKX-NEXT: fistpll (%eax) # sched: [4:1.00] -; SKX-NEXT: fisttps (%edx) # sched: [4:1.00] -; SKX-NEXT: fisttpl (%ecx) # sched: [4:1.00] -; SKX-NEXT: fisttpll (%eax) # sched: [4:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fist_fistp_fisttp: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fists (%edx) # sched: [1:1.00] -; BDVER2-NEXT: fistl (%ecx) # sched: [1:1.00] -; BDVER2-NEXT: fistps (%edx) # sched: [1:1.00] -; BDVER2-NEXT: fistpl (%ecx) # sched: [1:1.00] -; BDVER2-NEXT: fistpll (%eax) # sched: [1:1.00] -; BDVER2-NEXT: fisttps (%edx) # sched: [1:1.00] -; BDVER2-NEXT: fisttpl (%ecx) # sched: [1:1.00] -; BDVER2-NEXT: fisttpll (%eax) # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fist_fistp_fisttp: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fists (%edx) # sched: [1:1.00] -; BTVER2-NEXT: fistl (%ecx) # sched: [1:1.00] -; BTVER2-NEXT: fistps (%edx) # sched: [1:1.00] -; BTVER2-NEXT: fistpl (%ecx) # sched: [1:1.00] -; BTVER2-NEXT: fistpll (%eax) # sched: [1:1.00] -; BTVER2-NEXT: fisttps (%edx) # sched: [1:1.00] -; BTVER2-NEXT: fisttpl (%ecx) # sched: [1:1.00] -; BTVER2-NEXT: fisttpll (%eax) # sched: [1:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fist_fistp_fisttp: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fists (%edx) # sched: [12:0.50] -; ZNVER1-NEXT: fistl (%ecx) # sched: [12:0.50] -; ZNVER1-NEXT: fistps (%edx) # sched: [12:0.50] -; ZNVER1-NEXT: fistpl (%ecx) # sched: [12:0.50] -; ZNVER1-NEXT: fistpll (%eax) # sched: [12:0.50] -; ZNVER1-NEXT: fisttps (%edx) # sched: [12:0.50] -; ZNVER1-NEXT: fisttpl (%ecx) # sched: [12:0.50] -; ZNVER1-NEXT: fisttpll (%eax) # sched: [12:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fists $0 \0A\09 fistl $1 \0A\09 fistps $0 \0A\09 fistpl $1 \0A\09 fistpll $2 \0A\09 fisttps $0 \0A\09 fisttpl $1 \0A\09 fisttpll $2", "*m,*m,*m"(i16* %a0, i32* %a1, i64 *%a2) nounwind - ret void -} - -define void @test_fld(i16* %a0, i32* %a1, i64 *%a2) optsize { -; GENERIC-LABEL: test_fld: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fld %st(0) -; GENERIC-NEXT: flds (%edx) -; GENERIC-NEXT: fldl (%ecx) -; GENERIC-NEXT: fldt (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fld: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fld %st(0) # sched: [1:1.00] -; ATOM-NEXT: flds (%edx) # sched: [1:1.00] -; ATOM-NEXT: fldl (%ecx) # sched: [1:1.00] -; ATOM-NEXT: fldt (%eax) # sched: [4:2.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fld: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fld %st(0) # sched: [1:0.50] -; SLM-NEXT: flds (%edx) # sched: [3:1.00] -; SLM-NEXT: fldl (%ecx) # sched: [3:1.00] -; SLM-NEXT: fldt (%eax) # sched: [3:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fld: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fld %st(0) # sched: [1:1.00] -; SANDY-NEXT: flds (%edx) # sched: [9:1.00] -; SANDY-NEXT: fldl (%ecx) # sched: [9:1.00] -; SANDY-NEXT: fldt (%eax) # sched: [9:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fld: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fld %st(0) # sched: [1:0.50] -; HASWELL-NEXT: flds (%edx) # sched: [7:0.50] -; HASWELL-NEXT: fldl (%ecx) # sched: [7:0.50] -; HASWELL-NEXT: fldt (%eax) # sched: [7:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fld: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fld %st(0) # sched: [1:0.25] -; BROADWELL-NEXT: flds (%edx) # sched: [6:0.50] -; BROADWELL-NEXT: fldl (%ecx) # sched: [6:0.50] -; BROADWELL-NEXT: fldt (%eax) # sched: [6:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fld: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fld %st(0) # sched: [1:0.25] -; SKYLAKE-NEXT: flds (%edx) # sched: [7:0.50] -; SKYLAKE-NEXT: fldl (%ecx) # sched: [7:0.50] -; SKYLAKE-NEXT: fldt (%eax) # sched: [7:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fld: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fld %st(0) # sched: [1:0.25] -; SKX-NEXT: flds (%edx) # sched: [7:0.50] -; SKX-NEXT: fldl (%ecx) # sched: [7:0.50] -; SKX-NEXT: fldt (%eax) # sched: [7:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fld: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fld %st(0) # sched: [1:0.50] -; BDVER2-NEXT: flds (%edx) # sched: [5:0.50] -; BDVER2-NEXT: fldl (%ecx) # sched: [5:0.50] -; BDVER2-NEXT: fldt (%eax) # sched: [5:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fld: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fld %st(0) # sched: [1:0.50] -; BTVER2-NEXT: flds (%edx) # sched: [3:1.00] -; BTVER2-NEXT: fldl (%ecx) # sched: [3:1.00] -; BTVER2-NEXT: fldt (%eax) # sched: [3:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fld: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fld %st(0) # sched: [1:0.50] -; ZNVER1-NEXT: flds (%edx) # sched: [8:0.50] -; ZNVER1-NEXT: fldl (%ecx) # sched: [8:0.50] -; ZNVER1-NEXT: fldt (%eax) # sched: [1:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fld %st(0) \0A\09 flds $0 \0A\09 fldl $1 \0A\09 fldt $2", "*m,*m,*m"(i16* %a0, i32* %a1, i64 *%a2) nounwind - ret void -} - -define void @test_fldcw_fldenv(i8* %a0) optsize { -; GENERIC-LABEL: test_fldcw_fldenv: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fldcw (%eax) -; GENERIC-NEXT: fldenv (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fldcw_fldenv: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fldcw (%eax) # sched: [5:2.50] -; ATOM-NEXT: fldenv (%eax) # sched: [100:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fldcw_fldenv: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fldcw (%eax) # sched: [3:1.00] -; SLM-NEXT: fldenv (%eax) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fldcw_fldenv: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fldcw (%eax) # sched: [8:2.00] -; SANDY-NEXT: fldenv (%eax) # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fldcw_fldenv: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fldcw (%eax) # sched: [7:1.00] -; HASWELL-NEXT: fldenv (%eax) # sched: [61:14.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fldcw_fldenv: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fldcw (%eax) # sched: [7:1.00] -; BROADWELL-NEXT: fldenv (%eax) # sched: [60:14.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fldcw_fldenv: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fldcw (%eax) # sched: [7:1.00] -; SKYLAKE-NEXT: fldenv (%eax) # sched: [62:14.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fldcw_fldenv: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fldcw (%eax) # sched: [7:1.00] -; SKX-NEXT: fldenv (%eax) # sched: [62:14.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fldcw_fldenv: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fldcw (%eax) # sched: [5:0.50] -; BDVER2-NEXT: fldenv (%eax) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fldcw_fldenv: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fldcw (%eax) # sched: [3:1.00] -; BTVER2-NEXT: fldenv (%eax) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fldcw_fldenv: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fldcw (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: fldenv (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fldcw $0 \0A\09 fldenv $0", "*m"(i8* %a0) nounwind - ret void -} - -define void @test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz() optsize { -; GENERIC-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fld1 -; GENERIC-NEXT: fldl2e -; GENERIC-NEXT: fldl2t -; GENERIC-NEXT: fldlg2 -; GENERIC-NEXT: fldln2 -; GENERIC-NEXT: fldpi -; GENERIC-NEXT: fldz -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fld1 # sched: [6:3.00] -; ATOM-NEXT: fldl2e # sched: [10:5.00] -; ATOM-NEXT: fldl2t # sched: [10:5.00] -; ATOM-NEXT: fldlg2 # sched: [10:5.00] -; ATOM-NEXT: fldln2 # sched: [10:5.00] -; ATOM-NEXT: fldpi # sched: [10:5.00] -; ATOM-NEXT: fldz # sched: [1:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fld1 # sched: [1:0.50] -; SLM-NEXT: fldl2e # sched: [1:1.00] -; SLM-NEXT: fldl2t # sched: [1:1.00] -; SLM-NEXT: fldlg2 # sched: [1:1.00] -; SLM-NEXT: fldln2 # sched: [1:1.00] -; SLM-NEXT: fldpi # sched: [1:1.00] -; SLM-NEXT: fldz # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fld1 # sched: [1:1.00] -; SANDY-NEXT: fldl2e # sched: [1:1.00] -; SANDY-NEXT: fldl2t # sched: [1:1.00] -; SANDY-NEXT: fldlg2 # sched: [1:1.00] -; SANDY-NEXT: fldln2 # sched: [1:1.00] -; SANDY-NEXT: fldpi # sched: [1:1.00] -; SANDY-NEXT: fldz # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fld1 # sched: [1:1.00] -; HASWELL-NEXT: fldl2e # sched: [1:1.00] -; HASWELL-NEXT: fldl2t # sched: [1:1.00] -; HASWELL-NEXT: fldlg2 # sched: [1:1.00] -; HASWELL-NEXT: fldln2 # sched: [1:1.00] -; HASWELL-NEXT: fldpi # sched: [1:1.00] -; HASWELL-NEXT: fldz # sched: [1:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fld1 # sched: [1:1.00] -; BROADWELL-NEXT: fldl2e # sched: [1:1.00] -; BROADWELL-NEXT: fldl2t # sched: [1:1.00] -; BROADWELL-NEXT: fldlg2 # sched: [1:1.00] -; BROADWELL-NEXT: fldln2 # sched: [1:1.00] -; BROADWELL-NEXT: fldpi # sched: [1:1.00] -; BROADWELL-NEXT: fldz # sched: [1:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fld1 # sched: [1:1.00] -; SKYLAKE-NEXT: fldl2e # sched: [1:1.00] -; SKYLAKE-NEXT: fldl2t # sched: [1:1.00] -; SKYLAKE-NEXT: fldlg2 # sched: [1:1.00] -; SKYLAKE-NEXT: fldln2 # sched: [1:1.00] -; SKYLAKE-NEXT: fldpi # sched: [1:1.00] -; SKYLAKE-NEXT: fldz # sched: [1:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fld1 # sched: [1:1.00] -; SKX-NEXT: fldl2e # sched: [1:1.00] -; SKX-NEXT: fldl2t # sched: [1:1.00] -; SKX-NEXT: fldlg2 # sched: [1:1.00] -; SKX-NEXT: fldln2 # sched: [1:1.00] -; SKX-NEXT: fldpi # sched: [1:1.00] -; SKX-NEXT: fldz # sched: [1:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fld1 # sched: [3:1.00] -; BDVER2-NEXT: fldl2e # sched: [3:1.00] -; BDVER2-NEXT: fldl2t # sched: [3:1.00] -; BDVER2-NEXT: fldlg2 # sched: [3:1.00] -; BDVER2-NEXT: fldln2 # sched: [3:1.00] -; BDVER2-NEXT: fldpi # sched: [3:1.00] -; BDVER2-NEXT: fldz # sched: [3:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fld1 # sched: [3:1.00] -; BTVER2-NEXT: fldl2e # sched: [3:1.00] -; BTVER2-NEXT: fldl2t # sched: [3:1.00] -; BTVER2-NEXT: fldlg2 # sched: [3:1.00] -; BTVER2-NEXT: fldln2 # sched: [3:1.00] -; BTVER2-NEXT: fldpi # sched: [3:1.00] -; BTVER2-NEXT: fldz # sched: [3:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fld1 # sched: [11:1.00] -; ZNVER1-NEXT: fldl2e # sched: [11:1.00] -; ZNVER1-NEXT: fldl2t # sched: [11:1.00] -; ZNVER1-NEXT: fldlg2 # sched: [11:1.00] -; ZNVER1-NEXT: fldln2 # sched: [11:1.00] -; ZNVER1-NEXT: fldpi # sched: [11:1.00] -; ZNVER1-NEXT: fldz # sched: [8:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fld1 \0A\09 fldl2e \0A\09 fldl2t \0A\09 fldlg2 \0A\09 fldln2 \0A\09 fldpi \0A\09 fldz", ""() nounwind - ret void -} - -define void @test_fmul(float *%a0, double *%a1) optsize { -; GENERIC-LABEL: test_fmul: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fmul %st(0), %st(1) -; GENERIC-NEXT: fmul %st(2) -; GENERIC-NEXT: fmuls (%ecx) -; GENERIC-NEXT: fmull (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fmul: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fmul %st(0), %st(1) # sched: [4:4.00] -; ATOM-NEXT: fmul %st(2) # sched: [4:4.00] -; ATOM-NEXT: fmuls (%ecx) # sched: [4:4.00] -; ATOM-NEXT: fmull (%eax) # sched: [4:4.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fmul: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fmul %st(0), %st(1) # sched: [5:2.00] -; SLM-NEXT: fmul %st(2) # sched: [5:2.00] -; SLM-NEXT: fmuls (%ecx) # sched: [8:2.00] -; SLM-NEXT: fmull (%eax) # sched: [8:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fmul: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fmul %st(0), %st(1) # sched: [5:1.00] -; SANDY-NEXT: fmul %st(2) # sched: [5:1.00] -; SANDY-NEXT: fmuls (%ecx) # sched: [12:1.00] -; SANDY-NEXT: fmull (%eax) # sched: [12:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fmul: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fmul %st(0), %st(1) # sched: [5:1.00] -; HASWELL-NEXT: fmul %st(2) # sched: [5:1.00] -; HASWELL-NEXT: fmuls (%ecx) # sched: [12:1.00] -; HASWELL-NEXT: fmull (%eax) # sched: [12:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fmul: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fmul %st(0), %st(1) # sched: [5:1.00] -; BROADWELL-NEXT: fmul %st(2) # sched: [5:1.00] -; BROADWELL-NEXT: fmuls (%ecx) # sched: [11:1.00] -; BROADWELL-NEXT: fmull (%eax) # sched: [11:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fmul: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fmul %st(0), %st(1) # sched: [4:1.00] -; SKYLAKE-NEXT: fmul %st(2) # sched: [4:1.00] -; SKYLAKE-NEXT: fmuls (%ecx) # sched: [11:1.00] -; SKYLAKE-NEXT: fmull (%eax) # sched: [11:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fmul: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fmul %st(0), %st(1) # sched: [4:1.00] -; SKX-NEXT: fmul %st(2) # sched: [4:1.00] -; SKX-NEXT: fmuls (%ecx) # sched: [11:1.00] -; SKX-NEXT: fmull (%eax) # sched: [11:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fmul: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fmul %st(0), %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fmul %st(2) # sched: [5:1.00] -; BDVER2-NEXT: fmuls (%ecx) # sched: [10:1.00] -; BDVER2-NEXT: fmull (%eax) # sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fmul: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fmul %st(0), %st(1) # sched: [2:1.00] -; BTVER2-NEXT: fmul %st(2) # sched: [2:1.00] -; BTVER2-NEXT: fmuls (%ecx) # sched: [7:1.00] -; BTVER2-NEXT: fmull (%eax) # sched: [7:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fmul: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fmul %st(0), %st(1) # sched: [3:0.50] -; ZNVER1-NEXT: fmul %st(2) # sched: [3:0.50] -; ZNVER1-NEXT: fmuls (%ecx) # sched: [10:0.50] -; ZNVER1-NEXT: fmull (%eax) # sched: [10:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fmul %st(0), %st(1) \0A\09 fmul %st(2), %st(0) \0A\09 fmuls $0 \0A\09 fmull $1", "*m,*m"(float *%a0, double *%a1) nounwind - ret void -} - -define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize { -; GENERIC-LABEL: test_fmulp_fimul: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fmulp %st(1) -; GENERIC-NEXT: fmulp %st(2) -; GENERIC-NEXT: fimuls (%ecx) -; GENERIC-NEXT: fimull (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fmulp_fimul: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fmulp %st(1) # sched: [4:4.00] -; ATOM-NEXT: fmulp %st(2) # sched: [4:4.00] -; ATOM-NEXT: fimuls (%ecx) # sched: [4:4.00] -; ATOM-NEXT: fimull (%eax) # sched: [4:4.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fmulp_fimul: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fmulp %st(1) # sched: [5:2.00] -; SLM-NEXT: fmulp %st(2) # sched: [5:2.00] -; SLM-NEXT: fimuls (%ecx) # sched: [8:2.00] -; SLM-NEXT: fimull (%eax) # sched: [8:2.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fmulp_fimul: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fmulp %st(1) # sched: [5:1.00] -; SANDY-NEXT: fmulp %st(2) # sched: [5:1.00] -; SANDY-NEXT: fimuls (%ecx) # sched: [15:1.00] -; SANDY-NEXT: fimull (%eax) # sched: [15:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fmulp_fimul: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fmulp %st(1) # sched: [5:1.00] -; HASWELL-NEXT: fmulp %st(2) # sched: [5:1.00] -; HASWELL-NEXT: fimuls (%ecx) # sched: [15:1.00] -; HASWELL-NEXT: fimull (%eax) # sched: [15:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fmulp_fimul: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fmulp %st(1) # sched: [5:1.00] -; BROADWELL-NEXT: fmulp %st(2) # sched: [5:1.00] -; BROADWELL-NEXT: fimuls (%ecx) # sched: [14:1.00] -; BROADWELL-NEXT: fimull (%eax) # sched: [14:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fmulp_fimul: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fmulp %st(1) # sched: [4:1.00] -; SKYLAKE-NEXT: fmulp %st(2) # sched: [4:1.00] -; SKYLAKE-NEXT: fimuls (%ecx) # sched: [14:1.00] -; SKYLAKE-NEXT: fimull (%eax) # sched: [14:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fmulp_fimul: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fmulp %st(1) # sched: [4:1.00] -; SKX-NEXT: fmulp %st(2) # sched: [4:1.00] -; SKX-NEXT: fimuls (%ecx) # sched: [14:1.00] -; SKX-NEXT: fimull (%eax) # sched: [14:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fmulp_fimul: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fmulp %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fmulp %st(2) # sched: [5:1.00] -; BDVER2-NEXT: fimuls (%ecx) # sched: [10:1.00] -; BDVER2-NEXT: fimull (%eax) # sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fmulp_fimul: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fmulp %st(1) # sched: [2:1.00] -; BTVER2-NEXT: fmulp %st(2) # sched: [2:1.00] -; BTVER2-NEXT: fimuls (%ecx) # sched: [7:1.00] -; BTVER2-NEXT: fimull (%eax) # sched: [7:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fmulp_fimul: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fmulp %st(1) # sched: [3:0.50] -; ZNVER1-NEXT: fmulp %st(2) # sched: [3:0.50] -; ZNVER1-NEXT: fimuls (%ecx) # sched: [10:0.50] -; ZNVER1-NEXT: fimull (%eax) # sched: [10:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fmulp \0A\09 fmulp %st(2), %st(0) \0A\09 fimuls $0 \0A\09 fimull $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind - ret void -} - -define void @test_fnop() optsize { -; GENERIC-LABEL: test_fnop: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fnop -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fnop: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fnop # sched: [1:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fnop: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fnop # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fnop: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fnop # sched: [1:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fnop: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fnop # sched: [1:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fnop: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fnop # sched: [1:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fnop: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fnop # sched: [1:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fnop: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fnop # sched: [1:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fnop: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fnop # sched: [1:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fnop: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fnop # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fnop: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fnop # sched: [1:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fnop", ""() nounwind - ret void -} - -define void @test_fpatan() optsize { -; GENERIC-LABEL: test_fpatan: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fpatan -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fpatan: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fpatan # sched: [183:91.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fpatan: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fpatan # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fpatan: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fpatan # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fpatan: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fpatan # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fpatan: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fpatan # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fpatan: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fpatan # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fpatan: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fpatan # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fpatan: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fpatan # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fpatan: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fpatan # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fpatan: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fpatan # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fpatan", ""() nounwind - ret void -} - -define void @test_fprem_fprem1() optsize { -; GENERIC-LABEL: test_fprem_fprem1: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fprem -; GENERIC-NEXT: fprem1 -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fprem_fprem1: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fprem # sched: [55:27.50] -; ATOM-NEXT: fprem1 # sched: [71:35.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fprem_fprem1: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fprem # sched: [100:1.00] -; SLM-NEXT: fprem1 # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fprem_fprem1: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fprem # sched: [100:0.33] -; SANDY-NEXT: fprem1 # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fprem_fprem1: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fprem # sched: [19:7.00] -; HASWELL-NEXT: fprem1 # sched: [27:10.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fprem_fprem1: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fprem # sched: [100:0.25] -; BROADWELL-NEXT: fprem1 # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fprem_fprem1: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fprem # sched: [100:0.25] -; SKYLAKE-NEXT: fprem1 # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fprem_fprem1: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fprem # sched: [100:0.25] -; SKX-NEXT: fprem1 # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fprem_fprem1: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fprem # sched: [100:0.50] -; BDVER2-NEXT: fprem1 # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fprem_fprem1: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fprem # sched: [100:0.50] -; BTVER2-NEXT: fprem1 # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fprem_fprem1: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fprem # sched: [100:0.25] -; ZNVER1-NEXT: fprem1 # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fprem \0A\09 fprem1", ""() nounwind - ret void -} - -define void @test_fptan() optsize { -; GENERIC-LABEL: test_fptan: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fptan -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fptan: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fptan # sched: [168:84.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fptan: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fptan # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fptan: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fptan # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fptan: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fptan # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fptan: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fptan # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fptan: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fptan # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fptan: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fptan # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fptan: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fptan # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fptan: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fptan # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fptan: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fptan # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fptan", ""() nounwind - ret void -} - -define void @test_frndint() optsize { -; GENERIC-LABEL: test_frndint: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: frndint -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_frndint: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: frndint # sched: [46:23.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_frndint: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: frndint # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_frndint: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: frndint # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_frndint: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: frndint # sched: [11:4.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_frndint: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: frndint # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_frndint: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: frndint # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_frndint: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: frndint # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_frndint: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: frndint # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_frndint: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: frndint # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_frndint: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: frndint # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "frndint", ""() nounwind - ret void -} - -define void @test_frstor(i8* %a0) optsize { -; GENERIC-LABEL: test_frstor: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: #APP -; GENERIC-NEXT: frstor (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_frstor: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: frstor (%eax) # sched: [100:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_frstor: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: frstor (%eax) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_frstor: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: frstor (%eax) # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_frstor: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: frstor (%eax) # sched: [1:22.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_frstor: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: frstor (%eax) # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_frstor: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: frstor (%eax) # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_frstor: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: frstor (%eax) # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_frstor: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: frstor (%eax) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_frstor: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: frstor (%eax) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_frstor: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: frstor (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "frstor $0", "*m"(i8* %a0) nounwind - ret void -} - -define void @test_fsave(i8* %a0) optsize { -; GENERIC-LABEL: test_fsave: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: #APP -; GENERIC-NEXT: wait -; GENERIC-NEXT: fnsave (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fsave: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: wait # sched: [1:0.50] -; ATOM-NEXT: fnsave (%eax) # sched: [100:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fsave: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: wait # sched: [100:1.00] -; SLM-NEXT: fnsave (%eax) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fsave: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: wait # sched: [100:0.33] -; SANDY-NEXT: fnsave (%eax) # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fsave: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: wait # sched: [2:0.50] -; HASWELL-NEXT: fnsave (%eax) # sched: [1:36.75] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fsave: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: wait # sched: [2:0.50] -; BROADWELL-NEXT: fnsave (%eax) # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fsave: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: wait # sched: [2:0.50] -; SKYLAKE-NEXT: fnsave (%eax) # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fsave: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: wait # sched: [2:0.50] -; SKX-NEXT: fnsave (%eax) # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fsave: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: wait # sched: [100:0.50] -; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fsave: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: wait # sched: [100:0.50] -; BTVER2-NEXT: fnsave (%eax) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fsave: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: wait # sched: [1:1.00] -; ZNVER1-NEXT: fnsave (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fsave $0", "*m"(i8* %a0) nounwind - ret void -} - -define void @test_fnsave(i8* %a0) optsize { -; GENERIC-LABEL: test_fnsave: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fnsave (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fnsave: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fnsave (%eax) # sched: [100:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fnsave: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fnsave (%eax) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fnsave: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fnsave (%eax) # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fnsave: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fnsave (%eax) # sched: [1:36.75] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fnsave: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fnsave (%eax) # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fnsave: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fnsave (%eax) # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fnsave: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fnsave (%eax) # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fnsave: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fnsave: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fnsave (%eax) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fnsave: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fnsave (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fnsave $0", "*m"(i8* %a0) nounwind - ret void -} - -define void @test_fscale() optsize { -; GENERIC-LABEL: test_fscale: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fscale -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fscale: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fscale # sched: [77:38.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fscale: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fscale # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fscale: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fscale # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fscale: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fscale # sched: [75:12.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fscale: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fscale # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fscale: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fscale # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fscale: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fscale # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fscale: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fscale # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fscale: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fscale # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fscale: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fscale # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fscale", ""() nounwind - ret void -} - -define void @test_fsin() optsize { -; GENERIC-LABEL: test_fsin: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fsin -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fsin: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fsin # sched: [174:87.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fsin: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fsin # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fsin: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fsin # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fsin: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fsin # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fsin: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fsin # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fsin: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fsin # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fsin: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fsin # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fsin: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsin # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fsin: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fsin # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fsin: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fsin # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fsin", ""() nounwind - ret void -} - -define void @test_fsincos() optsize { -; GENERIC-LABEL: test_fsincos: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fsincos -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fsincos: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fsincos # sched: [174:87.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fsincos: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fsincos # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fsincos: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fsincos # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fsincos: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fsincos # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fsincos: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fsincos # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fsincos: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fsincos # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fsincos: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fsincos # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fsincos: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsincos # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fsincos: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fsincos # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fsincos: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fsincos # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fsincos", ""() nounwind - ret void -} - -define void @test_fsqrt() optsize { -; GENERIC-LABEL: test_fsqrt: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fsqrt -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fsqrt: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fsqrt # sched: [71:35.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fsqrt: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fsqrt # sched: [40:40.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fsqrt: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fsqrt # sched: [24:24.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fsqrt: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fsqrt # sched: [23:17.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fsqrt: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fsqrt # sched: [23:9.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fsqrt: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fsqrt # sched: [21:7.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fsqrt: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fsqrt # sched: [21:7.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fsqrt: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsqrt # sched: [1:17.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fsqrt: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fsqrt # sched: [35:35.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fsqrt: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fsqrt # sched: [20:20.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fsqrt", ""() nounwind - ret void -} - -define void @test_fst_fstp(i16* %a0, i32* %a1, i64 *%a2) optsize { -; GENERIC-LABEL: test_fst_fstp: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fst %st(0) -; GENERIC-NEXT: fsts (%edx) -; GENERIC-NEXT: fstl (%ecx) -; GENERIC-NEXT: fstp %st(0) -; GENERIC-NEXT: fstpl (%edx) -; GENERIC-NEXT: fstpl (%ecx) -; GENERIC-NEXT: fstpt (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fst_fstp: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fst %st(0) # sched: [2:1.00] -; ATOM-NEXT: fsts (%edx) # sched: [2:1.00] -; ATOM-NEXT: fstl (%ecx) # sched: [2:1.00] -; ATOM-NEXT: fstp %st(0) # sched: [2:1.00] -; ATOM-NEXT: fstpl (%edx) # sched: [2:1.00] -; ATOM-NEXT: fstpl (%ecx) # sched: [2:1.00] -; ATOM-NEXT: fstpt (%eax) # sched: [5:2.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fst_fstp: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fst %st(0) # sched: [1:0.50] -; SLM-NEXT: fsts (%edx) # sched: [1:1.00] -; SLM-NEXT: fstl (%ecx) # sched: [1:1.00] -; SLM-NEXT: fstp %st(0) # sched: [1:0.50] -; SLM-NEXT: fstpl (%edx) # sched: [1:1.00] -; SLM-NEXT: fstpl (%ecx) # sched: [1:1.00] -; SLM-NEXT: fstpt (%eax) # sched: [1:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fst_fstp: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fst %st(0) # sched: [1:1.00] -; SANDY-NEXT: fsts (%edx) # sched: [6:1.00] -; SANDY-NEXT: fstl (%ecx) # sched: [6:1.00] -; SANDY-NEXT: fstp %st(0) # sched: [1:1.00] -; SANDY-NEXT: fstpl (%edx) # sched: [6:1.00] -; SANDY-NEXT: fstpl (%ecx) # sched: [6:1.00] -; SANDY-NEXT: fstpt (%eax) # sched: [6:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fst_fstp: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fst %st(0) # sched: [1:0.50] -; HASWELL-NEXT: fsts (%edx) # sched: [1:1.00] -; HASWELL-NEXT: fstl (%ecx) # sched: [1:1.00] -; HASWELL-NEXT: fstp %st(0) # sched: [1:0.50] -; HASWELL-NEXT: fstpl (%edx) # sched: [1:1.00] -; HASWELL-NEXT: fstpl (%ecx) # sched: [1:1.00] -; HASWELL-NEXT: fstpt (%eax) # sched: [1:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fst_fstp: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fst %st(0) # sched: [1:0.25] -; BROADWELL-NEXT: fsts (%edx) # sched: [1:1.00] -; BROADWELL-NEXT: fstl (%ecx) # sched: [1:1.00] -; BROADWELL-NEXT: fstp %st(0) # sched: [1:0.25] -; BROADWELL-NEXT: fstpl (%edx) # sched: [1:1.00] -; BROADWELL-NEXT: fstpl (%ecx) # sched: [1:1.00] -; BROADWELL-NEXT: fstpt (%eax) # sched: [1:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fst_fstp: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fst %st(0) # sched: [1:0.25] -; SKYLAKE-NEXT: fsts (%edx) # sched: [1:1.00] -; SKYLAKE-NEXT: fstl (%ecx) # sched: [1:1.00] -; SKYLAKE-NEXT: fstp %st(0) # sched: [1:0.25] -; SKYLAKE-NEXT: fstpl (%edx) # sched: [1:1.00] -; SKYLAKE-NEXT: fstpl (%ecx) # sched: [1:1.00] -; SKYLAKE-NEXT: fstpt (%eax) # sched: [1:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fst_fstp: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fst %st(0) # sched: [1:0.25] -; SKX-NEXT: fsts (%edx) # sched: [1:1.00] -; SKX-NEXT: fstl (%ecx) # sched: [1:1.00] -; SKX-NEXT: fstp %st(0) # sched: [1:0.25] -; SKX-NEXT: fstpl (%edx) # sched: [1:1.00] -; SKX-NEXT: fstpl (%ecx) # sched: [1:1.00] -; SKX-NEXT: fstpt (%eax) # sched: [1:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fst_fstp: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fst %st(0) # sched: [1:0.50] -; BDVER2-NEXT: fsts (%edx) # sched: [1:1.00] -; BDVER2-NEXT: fstl (%ecx) # sched: [1:1.00] -; BDVER2-NEXT: fstp %st(0) # sched: [1:0.50] -; BDVER2-NEXT: fstpl (%edx) # sched: [1:1.00] -; BDVER2-NEXT: fstpl (%ecx) # sched: [1:1.00] -; BDVER2-NEXT: fstpt (%eax) # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fst_fstp: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fst %st(0) # sched: [1:0.50] -; BTVER2-NEXT: fsts (%edx) # sched: [1:1.00] -; BTVER2-NEXT: fstl (%ecx) # sched: [1:1.00] -; BTVER2-NEXT: fstp %st(0) # sched: [1:0.50] -; BTVER2-NEXT: fstpl (%edx) # sched: [1:1.00] -; BTVER2-NEXT: fstpl (%ecx) # sched: [1:1.00] -; BTVER2-NEXT: fstpt (%eax) # sched: [1:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fst_fstp: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fst %st(0) # sched: [5:0.50] -; ZNVER1-NEXT: fsts (%edx) # sched: [1:0.50] -; ZNVER1-NEXT: fstl (%ecx) # sched: [1:0.50] -; ZNVER1-NEXT: fstp %st(0) # sched: [5:0.50] -; ZNVER1-NEXT: fstpl (%edx) # sched: [1:0.50] -; ZNVER1-NEXT: fstpl (%ecx) # sched: [1:0.50] -; ZNVER1-NEXT: fstpt (%eax) # sched: [5:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fst %st(0) \0A\09 fsts $0 \0A\09 fstl $1 \0A\09 fstp %st(0) \0A\09 fstpl $0 \0A\09 fstpl $1 \0A\09 fstpt $2", "*m,*m,*m"(i16* %a0, i32* %a1, i64 *%a2) nounwind - ret void -} - -define void @test_fstcw_fstenv_fstsw(i8* %a0) optsize { -; GENERIC-LABEL: test_fstcw_fstenv_fstsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: #APP -; GENERIC-NEXT: wait -; GENERIC-NEXT: fnstcw (%eax) -; GENERIC-NEXT: wait -; GENERIC-NEXT: fnstenv (%eax) -; GENERIC-NEXT: wait -; GENERIC-NEXT: fnstsw (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fstcw_fstenv_fstsw: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: wait # sched: [1:0.50] -; ATOM-NEXT: fnstcw (%eax) # sched: [8:4.00] -; ATOM-NEXT: wait # sched: [1:0.50] -; ATOM-NEXT: fnstenv (%eax) # sched: [100:0.50] -; ATOM-NEXT: wait # sched: [1:0.50] -; ATOM-NEXT: fnstsw (%eax) # sched: [100:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fstcw_fstenv_fstsw: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: wait # sched: [100:1.00] -; SLM-NEXT: fnstcw (%eax) # sched: [1:0.50] -; SLM-NEXT: wait # sched: [100:1.00] -; SLM-NEXT: fnstenv (%eax) # sched: [100:1.00] -; SLM-NEXT: wait # sched: [100:1.00] -; SLM-NEXT: fnstsw (%eax) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fstcw_fstenv_fstsw: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: wait # sched: [100:0.33] -; SANDY-NEXT: fnstcw (%eax) # sched: [7:1.00] -; SANDY-NEXT: wait # sched: [100:0.33] -; SANDY-NEXT: fnstenv (%eax) # sched: [100:0.33] -; SANDY-NEXT: wait # sched: [100:0.33] -; SANDY-NEXT: fnstsw (%eax) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fstcw_fstenv_fstsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: wait # sched: [2:0.50] -; HASWELL-NEXT: fnstcw (%eax) # sched: [2:1.00] -; HASWELL-NEXT: wait # sched: [2:0.50] -; HASWELL-NEXT: fnstenv (%eax) # sched: [115:19.50] -; HASWELL-NEXT: wait # sched: [2:0.50] -; HASWELL-NEXT: fnstsw (%eax) # sched: [4:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fstcw_fstenv_fstsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: wait # sched: [2:0.50] -; BROADWELL-NEXT: fnstcw (%eax) # sched: [2:1.00] -; BROADWELL-NEXT: wait # sched: [2:0.50] -; BROADWELL-NEXT: fnstenv (%eax) # sched: [115:19.50] -; BROADWELL-NEXT: wait # sched: [2:0.50] -; BROADWELL-NEXT: fnstsw (%eax) # sched: [4:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fstcw_fstenv_fstsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: wait # sched: [2:0.50] -; SKYLAKE-NEXT: fnstcw (%eax) # sched: [2:1.00] -; SKYLAKE-NEXT: wait # sched: [2:0.50] -; SKYLAKE-NEXT: fnstenv (%eax) # sched: [106:19.50] -; SKYLAKE-NEXT: wait # sched: [2:0.50] -; SKYLAKE-NEXT: fnstsw (%eax) # sched: [3:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fstcw_fstenv_fstsw: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: wait # sched: [2:0.50] -; SKX-NEXT: fnstcw (%eax) # sched: [2:1.00] -; SKX-NEXT: wait # sched: [2:0.50] -; SKX-NEXT: fnstenv (%eax) # sched: [106:19.50] -; SKX-NEXT: wait # sched: [2:0.50] -; SKX-NEXT: fnstsw (%eax) # sched: [3:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fstcw_fstenv_fstsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: wait # sched: [100:0.50] -; BDVER2-NEXT: fnstcw (%eax) # sched: [1:0.50] -; BDVER2-NEXT: wait # sched: [100:0.50] -; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.50] -; BDVER2-NEXT: wait # sched: [100:0.50] -; BDVER2-NEXT: fnstsw (%eax) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fstcw_fstenv_fstsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: wait # sched: [100:0.50] -; BTVER2-NEXT: fnstcw (%eax) # sched: [1:0.50] -; BTVER2-NEXT: wait # sched: [100:0.50] -; BTVER2-NEXT: fnstenv (%eax) # sched: [100:0.50] -; BTVER2-NEXT: wait # sched: [100:0.50] -; BTVER2-NEXT: fnstsw (%eax) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fstcw_fstenv_fstsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: wait # sched: [1:1.00] -; ZNVER1-NEXT: fnstcw (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: wait # sched: [1:1.00] -; ZNVER1-NEXT: fnstenv (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: wait # sched: [1:1.00] -; ZNVER1-NEXT: fnstsw (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fstcw $0 \0A\09 fstenv $0 \0A\09 fstsw $0", "*m"(i8* %a0) nounwind - ret void -} - -define void @test_fnstcw_fnstenv_fnstsw(i8* %a0) optsize { -; GENERIC-LABEL: test_fnstcw_fnstenv_fnstsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fnstcw (%eax) -; GENERIC-NEXT: fnstenv (%eax) -; GENERIC-NEXT: fnstsw (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fnstcw_fnstenv_fnstsw: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fnstcw (%eax) # sched: [8:4.00] -; ATOM-NEXT: fnstenv (%eax) # sched: [100:0.50] -; ATOM-NEXT: fnstsw (%eax) # sched: [100:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fnstcw_fnstenv_fnstsw: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fnstcw (%eax) # sched: [1:0.50] -; SLM-NEXT: fnstenv (%eax) # sched: [100:1.00] -; SLM-NEXT: fnstsw (%eax) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fnstcw_fnstenv_fnstsw: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fnstcw (%eax) # sched: [7:1.00] -; SANDY-NEXT: fnstenv (%eax) # sched: [100:0.33] -; SANDY-NEXT: fnstsw (%eax) # sched: [7:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fnstcw_fnstenv_fnstsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fnstcw (%eax) # sched: [2:1.00] -; HASWELL-NEXT: fnstenv (%eax) # sched: [115:19.50] -; HASWELL-NEXT: fnstsw (%eax) # sched: [4:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fnstcw_fnstenv_fnstsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fnstcw (%eax) # sched: [2:1.00] -; BROADWELL-NEXT: fnstenv (%eax) # sched: [115:19.50] -; BROADWELL-NEXT: fnstsw (%eax) # sched: [4:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fnstcw_fnstenv_fnstsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fnstcw (%eax) # sched: [2:1.00] -; SKYLAKE-NEXT: fnstenv (%eax) # sched: [106:19.50] -; SKYLAKE-NEXT: fnstsw (%eax) # sched: [3:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fnstcw_fnstenv_fnstsw: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fnstcw (%eax) # sched: [2:1.00] -; SKX-NEXT: fnstenv (%eax) # sched: [106:19.50] -; SKX-NEXT: fnstsw (%eax) # sched: [3:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fnstcw_fnstenv_fnstsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fnstcw (%eax) # sched: [1:0.50] -; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.50] -; BDVER2-NEXT: fnstsw (%eax) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fnstcw_fnstenv_fnstsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fnstcw (%eax) # sched: [1:0.50] -; BTVER2-NEXT: fnstenv (%eax) # sched: [100:0.50] -; BTVER2-NEXT: fnstsw (%eax) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fnstcw_fnstenv_fnstsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fnstcw (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: fnstenv (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: fnstsw (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fnstcw $0 \0A\09 fnstenv $0 \0A\09 fnstsw $0", "*m"(i8* %a0) nounwind - ret void -} - -define void @test_fsub(float *%a0, double *%a1) optsize { -; GENERIC-LABEL: test_fsub: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fsub %st(0), %st(1) -; GENERIC-NEXT: fsub %st(2) -; GENERIC-NEXT: fsubs (%ecx) -; GENERIC-NEXT: fsubl (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fsub: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fsub %st(0), %st(1) # sched: [5:5.00] -; ATOM-NEXT: fsub %st(2) # sched: [5:5.00] -; ATOM-NEXT: fsubs (%ecx) # sched: [5:5.00] -; ATOM-NEXT: fsubl (%eax) # sched: [5:5.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fsub: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; SLM-NEXT: fsub %st(2) # sched: [3:1.00] -; SLM-NEXT: fsubs (%ecx) # sched: [6:1.00] -; SLM-NEXT: fsubl (%eax) # sched: [6:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fsub: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; SANDY-NEXT: fsub %st(2) # sched: [3:1.00] -; SANDY-NEXT: fsubs (%ecx) # sched: [10:1.00] -; SANDY-NEXT: fsubl (%eax) # sched: [10:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fsub: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; HASWELL-NEXT: fsub %st(2) # sched: [3:1.00] -; HASWELL-NEXT: fsubs (%ecx) # sched: [10:1.00] -; HASWELL-NEXT: fsubl (%eax) # sched: [10:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fsub: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; BROADWELL-NEXT: fsub %st(2) # sched: [3:1.00] -; BROADWELL-NEXT: fsubs (%ecx) # sched: [9:1.00] -; BROADWELL-NEXT: fsubl (%eax) # sched: [9:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fsub: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; SKYLAKE-NEXT: fsub %st(2) # sched: [3:1.00] -; SKYLAKE-NEXT: fsubs (%ecx) # sched: [10:1.00] -; SKYLAKE-NEXT: fsubl (%eax) # sched: [10:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fsub: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; SKX-NEXT: fsub %st(2) # sched: [3:1.00] -; SKX-NEXT: fsubs (%ecx) # sched: [10:1.00] -; SKX-NEXT: fsubl (%eax) # sched: [10:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fsub: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsub %st(0), %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fsub %st(2) # sched: [5:1.00] -; BDVER2-NEXT: fsubs (%ecx) # sched: [10:1.00] -; BDVER2-NEXT: fsubl (%eax) # sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fsub: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fsub %st(2) # sched: [3:1.00] -; BTVER2-NEXT: fsubs (%ecx) # sched: [8:1.00] -; BTVER2-NEXT: fsubl (%eax) # sched: [8:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fsub: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; ZNVER1-NEXT: fsub %st(2) # sched: [3:1.00] -; ZNVER1-NEXT: fsubs (%ecx) # sched: [10:1.00] -; ZNVER1-NEXT: fsubl (%eax) # sched: [10:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fsub %st(0), %st(1) \0A\09 fsub %st(2), %st(0) \0A\09 fsubs $0 \0A\09 fsubl $1", "*m,*m"(float *%a0, double *%a1) nounwind - ret void -} - -define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize { -; GENERIC-LABEL: test_fsubp_fisub: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fsubp %st(1) -; GENERIC-NEXT: fsubp %st(2) -; GENERIC-NEXT: fisubs (%ecx) -; GENERIC-NEXT: fisubl (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fsubp_fisub: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fsubp %st(1) # sched: [5:5.00] -; ATOM-NEXT: fsubp %st(2) # sched: [5:5.00] -; ATOM-NEXT: fisubs (%ecx) # sched: [5:5.00] -; ATOM-NEXT: fisubl (%eax) # sched: [5:5.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fsubp_fisub: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fsubp %st(1) # sched: [3:1.00] -; SLM-NEXT: fsubp %st(2) # sched: [3:1.00] -; SLM-NEXT: fisubs (%ecx) # sched: [6:1.00] -; SLM-NEXT: fisubl (%eax) # sched: [6:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fsubp_fisub: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fsubp %st(1) # sched: [3:1.00] -; SANDY-NEXT: fsubp %st(2) # sched: [3:1.00] -; SANDY-NEXT: fisubs (%ecx) # sched: [13:2.00] -; SANDY-NEXT: fisubl (%eax) # sched: [13:2.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fsubp_fisub: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fsubp %st(1) # sched: [3:1.00] -; HASWELL-NEXT: fsubp %st(2) # sched: [3:1.00] -; HASWELL-NEXT: fisubs (%ecx) # sched: [13:2.00] -; HASWELL-NEXT: fisubl (%eax) # sched: [13:2.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fsubp_fisub: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fsubp %st(1) # sched: [3:1.00] -; BROADWELL-NEXT: fsubp %st(2) # sched: [3:1.00] -; BROADWELL-NEXT: fisubs (%ecx) # sched: [12:2.00] -; BROADWELL-NEXT: fisubl (%eax) # sched: [12:2.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fsubp_fisub: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fsubp %st(1) # sched: [3:1.00] -; SKYLAKE-NEXT: fsubp %st(2) # sched: [3:1.00] -; SKYLAKE-NEXT: fisubs (%ecx) # sched: [13:2.00] -; SKYLAKE-NEXT: fisubl (%eax) # sched: [13:2.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fsubp_fisub: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fsubp %st(1) # sched: [3:1.00] -; SKX-NEXT: fsubp %st(2) # sched: [3:1.00] -; SKX-NEXT: fisubs (%ecx) # sched: [13:2.00] -; SKX-NEXT: fisubl (%eax) # sched: [13:2.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fsubp_fisub: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsubp %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fsubp %st(2) # sched: [5:1.00] -; BDVER2-NEXT: fisubs (%ecx) # sched: [10:1.00] -; BDVER2-NEXT: fisubl (%eax) # sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fsubp_fisub: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fsubp %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fsubp %st(2) # sched: [3:1.00] -; BTVER2-NEXT: fisubs (%ecx) # sched: [8:1.00] -; BTVER2-NEXT: fisubl (%eax) # sched: [8:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fsubp_fisub: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fsubp %st(1) # sched: [3:1.00] -; ZNVER1-NEXT: fsubp %st(2) # sched: [3:1.00] -; ZNVER1-NEXT: fisubs (%ecx) # sched: [10:1.00] -; ZNVER1-NEXT: fisubl (%eax) # sched: [10:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fsubp \0A\09 fsubp %st(2), %st(0) \0A\09 fisubs $0 \0A\09 fisubl $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind - ret void -} - -define void @test_fsubr(float *%a0, double *%a1) optsize { -; GENERIC-LABEL: test_fsubr: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fsubr %st(0), %st(1) -; GENERIC-NEXT: fsubr %st(2) -; GENERIC-NEXT: fsubrs (%ecx) -; GENERIC-NEXT: fsubrl (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fsubr: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fsubr %st(0), %st(1) # sched: [5:5.00] -; ATOM-NEXT: fsubr %st(2) # sched: [5:5.00] -; ATOM-NEXT: fsubrs (%ecx) # sched: [5:5.00] -; ATOM-NEXT: fsubrl (%eax) # sched: [5:5.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fsubr: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; SLM-NEXT: fsubr %st(2) # sched: [3:1.00] -; SLM-NEXT: fsubrs (%ecx) # sched: [6:1.00] -; SLM-NEXT: fsubrl (%eax) # sched: [6:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fsubr: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; SANDY-NEXT: fsubr %st(2) # sched: [3:1.00] -; SANDY-NEXT: fsubrs (%ecx) # sched: [10:1.00] -; SANDY-NEXT: fsubrl (%eax) # sched: [10:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fsubr: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; HASWELL-NEXT: fsubr %st(2) # sched: [3:1.00] -; HASWELL-NEXT: fsubrs (%ecx) # sched: [10:1.00] -; HASWELL-NEXT: fsubrl (%eax) # sched: [10:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fsubr: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; BROADWELL-NEXT: fsubr %st(2) # sched: [3:1.00] -; BROADWELL-NEXT: fsubrs (%ecx) # sched: [9:1.00] -; BROADWELL-NEXT: fsubrl (%eax) # sched: [9:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fsubr: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; SKYLAKE-NEXT: fsubr %st(2) # sched: [3:1.00] -; SKYLAKE-NEXT: fsubrs (%ecx) # sched: [10:1.00] -; SKYLAKE-NEXT: fsubrl (%eax) # sched: [10:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fsubr: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; SKX-NEXT: fsubr %st(2) # sched: [3:1.00] -; SKX-NEXT: fsubrs (%ecx) # sched: [10:1.00] -; SKX-NEXT: fsubrl (%eax) # sched: [10:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fsubr: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsubr %st(0), %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fsubr %st(2) # sched: [5:1.00] -; BDVER2-NEXT: fsubrs (%ecx) # sched: [10:1.00] -; BDVER2-NEXT: fsubrl (%eax) # sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fsubr: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fsubr %st(2) # sched: [3:1.00] -; BTVER2-NEXT: fsubrs (%ecx) # sched: [8:1.00] -; BTVER2-NEXT: fsubrl (%eax) # sched: [8:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fsubr: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; ZNVER1-NEXT: fsubr %st(2) # sched: [3:1.00] -; ZNVER1-NEXT: fsubrs (%ecx) # sched: [10:1.00] -; ZNVER1-NEXT: fsubrl (%eax) # sched: [10:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fsubr %st(0), %st(1) \0A\09 fsubr %st(2), %st(0) \0A\09 fsubrs $0 \0A\09 fsubrl $1", "*m,*m"(float *%a0, double *%a1) nounwind - ret void -} - -define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize { -; GENERIC-LABEL: test_fsubrp_fisubr: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fsubrp %st(1) -; GENERIC-NEXT: fsubrp %st(2) -; GENERIC-NEXT: fisubrs (%ecx) -; GENERIC-NEXT: fisubrl (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fsubrp_fisubr: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fsubrp %st(1) # sched: [5:5.00] -; ATOM-NEXT: fsubrp %st(2) # sched: [5:5.00] -; ATOM-NEXT: fisubrs (%ecx) # sched: [5:5.00] -; ATOM-NEXT: fisubrl (%eax) # sched: [5:5.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fsubrp_fisubr: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fsubrp %st(1) # sched: [3:1.00] -; SLM-NEXT: fsubrp %st(2) # sched: [3:1.00] -; SLM-NEXT: fisubrs (%ecx) # sched: [6:1.00] -; SLM-NEXT: fisubrl (%eax) # sched: [6:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fsubrp_fisubr: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fsubrp %st(1) # sched: [3:1.00] -; SANDY-NEXT: fsubrp %st(2) # sched: [3:1.00] -; SANDY-NEXT: fisubrs (%ecx) # sched: [13:2.00] -; SANDY-NEXT: fisubrl (%eax) # sched: [13:2.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fsubrp_fisubr: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fsubrp %st(1) # sched: [3:1.00] -; HASWELL-NEXT: fsubrp %st(2) # sched: [3:1.00] -; HASWELL-NEXT: fisubrs (%ecx) # sched: [13:2.00] -; HASWELL-NEXT: fisubrl (%eax) # sched: [13:2.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fsubrp_fisubr: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fsubrp %st(1) # sched: [3:1.00] -; BROADWELL-NEXT: fsubrp %st(2) # sched: [3:1.00] -; BROADWELL-NEXT: fisubrs (%ecx) # sched: [12:2.00] -; BROADWELL-NEXT: fisubrl (%eax) # sched: [12:2.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fsubrp_fisubr: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fsubrp %st(1) # sched: [3:1.00] -; SKYLAKE-NEXT: fsubrp %st(2) # sched: [3:1.00] -; SKYLAKE-NEXT: fisubrs (%ecx) # sched: [13:2.00] -; SKYLAKE-NEXT: fisubrl (%eax) # sched: [13:2.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fsubrp_fisubr: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fsubrp %st(1) # sched: [3:1.00] -; SKX-NEXT: fsubrp %st(2) # sched: [3:1.00] -; SKX-NEXT: fisubrs (%ecx) # sched: [13:2.00] -; SKX-NEXT: fisubrl (%eax) # sched: [13:2.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fsubrp_fisubr: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsubrp %st(1) # sched: [5:1.00] -; BDVER2-NEXT: fsubrp %st(2) # sched: [5:1.00] -; BDVER2-NEXT: fisubrs (%ecx) # sched: [10:1.00] -; BDVER2-NEXT: fisubrl (%eax) # sched: [10:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fsubrp_fisubr: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fsubrp %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fsubrp %st(2) # sched: [3:1.00] -; BTVER2-NEXT: fisubrs (%ecx) # sched: [8:1.00] -; BTVER2-NEXT: fisubrl (%eax) # sched: [8:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fsubrp_fisubr: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fsubrp %st(1) # sched: [3:1.00] -; ZNVER1-NEXT: fsubrp %st(2) # sched: [3:1.00] -; ZNVER1-NEXT: fisubrs (%ecx) # sched: [10:1.00] -; ZNVER1-NEXT: fisubrl (%eax) # sched: [10:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fsubrp \0A\09 fsubrp %st(2), %st(0) \0A\09 fisubrs $0 \0A\09 fisubrl $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind - ret void -} - -define void @test_ftst() optsize { -; GENERIC-LABEL: test_ftst: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: ftst -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_ftst: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: ftst # sched: [9:4.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_ftst: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: ftst # sched: [3:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_ftst: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: ftst # sched: [3:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_ftst: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: ftst # sched: [1:1.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ftst: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: ftst # sched: [3:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_ftst: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: ftst # sched: [2:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_ftst: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: ftst # sched: [2:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_ftst: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: ftst # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_ftst: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: ftst # sched: [3:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ftst: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: ftst # sched: [1:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "ftst", ""() nounwind - ret void -} - -define void @test_fucom_fucomp_fucompp() optsize { -; GENERIC-LABEL: test_fucom_fucomp_fucompp: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fucom %st(1) -; GENERIC-NEXT: fucom %st(3) -; GENERIC-NEXT: fucomp %st(1) -; GENERIC-NEXT: fucomp %st(3) -; GENERIC-NEXT: fucompp -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fucom_fucomp_fucompp: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fucom %st(1) # sched: [1:1.00] -; ATOM-NEXT: fucom %st(3) # sched: [1:1.00] -; ATOM-NEXT: fucomp %st(1) # sched: [1:1.00] -; ATOM-NEXT: fucomp %st(3) # sched: [1:1.00] -; ATOM-NEXT: fucompp # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fucom_fucomp_fucompp: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fucom %st(1) # sched: [3:1.00] -; SLM-NEXT: fucom %st(3) # sched: [3:1.00] -; SLM-NEXT: fucomp %st(1) # sched: [3:1.00] -; SLM-NEXT: fucomp %st(3) # sched: [3:1.00] -; SLM-NEXT: fucompp # sched: [3:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fucom_fucomp_fucompp: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fucom %st(1) # sched: [1:1.00] -; SANDY-NEXT: fucom %st(3) # sched: [1:1.00] -; SANDY-NEXT: fucomp %st(1) # sched: [1:1.00] -; SANDY-NEXT: fucomp %st(3) # sched: [1:1.00] -; SANDY-NEXT: fucompp # sched: [3:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fucom_fucomp_fucompp: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fucom %st(1) # sched: [1:1.00] -; HASWELL-NEXT: fucom %st(3) # sched: [1:1.00] -; HASWELL-NEXT: fucomp %st(1) # sched: [1:1.00] -; HASWELL-NEXT: fucomp %st(3) # sched: [1:1.00] -; HASWELL-NEXT: fucompp # sched: [1:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fucom_fucomp_fucompp: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fucom %st(1) # sched: [1:1.00] -; BROADWELL-NEXT: fucom %st(3) # sched: [1:1.00] -; BROADWELL-NEXT: fucomp %st(1) # sched: [1:1.00] -; BROADWELL-NEXT: fucomp %st(3) # sched: [1:1.00] -; BROADWELL-NEXT: fucompp # sched: [3:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fucom_fucomp_fucompp: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fucom %st(1) # sched: [1:1.00] -; SKYLAKE-NEXT: fucom %st(3) # sched: [1:1.00] -; SKYLAKE-NEXT: fucomp %st(1) # sched: [1:1.00] -; SKYLAKE-NEXT: fucomp %st(3) # sched: [1:1.00] -; SKYLAKE-NEXT: fucompp # sched: [2:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fucom_fucomp_fucompp: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fucom %st(1) # sched: [1:1.00] -; SKX-NEXT: fucom %st(3) # sched: [1:1.00] -; SKX-NEXT: fucomp %st(1) # sched: [1:1.00] -; SKX-NEXT: fucomp %st(3) # sched: [1:1.00] -; SKX-NEXT: fucompp # sched: [2:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fucom_fucomp_fucompp: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fucom %st(1) # sched: [1:1.00] -; BDVER2-NEXT: fucom %st(3) # sched: [1:1.00] -; BDVER2-NEXT: fucomp %st(1) # sched: [1:1.00] -; BDVER2-NEXT: fucomp %st(3) # sched: [1:1.00] -; BDVER2-NEXT: fucompp # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fucom_fucomp_fucompp: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fucom %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fucom %st(3) # sched: [3:1.00] -; BTVER2-NEXT: fucomp %st(1) # sched: [3:1.00] -; BTVER2-NEXT: fucomp %st(3) # sched: [3:1.00] -; BTVER2-NEXT: fucompp # sched: [3:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fucom_fucomp_fucompp: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fucom %st(1) # sched: [1:1.00] -; ZNVER1-NEXT: fucom %st(3) # sched: [1:1.00] -; ZNVER1-NEXT: fucomp %st(1) # sched: [1:1.00] -; ZNVER1-NEXT: fucomp %st(3) # sched: [1:1.00] -; ZNVER1-NEXT: fucompp # sched: [1:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fucom \0A\09 fucom %st(3) \0A\09 fucomp \0A\09 fucomp %st(3) \0A\09 fucompp", ""() nounwind - ret void -} - -define void @test_fucomi_fucomip() optsize { -; GENERIC-LABEL: test_fucomi_fucomip: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fucomi %st(3) -; GENERIC-NEXT: fucompi %st(3) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fucomi_fucomip: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fucomi %st(3) # sched: [9:4.50] -; ATOM-NEXT: fucompi %st(3) # sched: [9:4.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fucomi_fucomip: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fucomi %st(3) # sched: [3:1.00] -; SLM-NEXT: fucompi %st(3) # sched: [3:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fucomi_fucomip: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fucomi %st(3) # sched: [3:1.00] -; SANDY-NEXT: fucompi %st(3) # sched: [3:1.00] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fucomi_fucomip: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fucomi %st(3) # sched: [1:0.50] -; HASWELL-NEXT: fucompi %st(3) # sched: [1:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fucomi_fucomip: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fucomi %st(3) # sched: [3:1.00] -; BROADWELL-NEXT: fucompi %st(3) # sched: [3:1.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fucomi_fucomip: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fucomi %st(3) # sched: [2:1.00] -; SKYLAKE-NEXT: fucompi %st(3) # sched: [2:1.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fucomi_fucomip: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fucomi %st(3) # sched: [2:1.00] -; SKX-NEXT: fucompi %st(3) # sched: [2:1.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fucomi_fucomip: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fucomi %st(3) # sched: [1:1.00] -; BDVER2-NEXT: fucompi %st(3) # sched: [1:1.00] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fucomi_fucomip: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fucomi %st(3) # sched: [3:1.00] -; BTVER2-NEXT: fucompi %st(3) # sched: [3:1.00] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fucomi_fucomip: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fucomi %st(3) # sched: [9:0.50] -; ZNVER1-NEXT: fucompi %st(3) # sched: [9:0.50] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fucomi %st(3) \0A\09 fucomip %st(3)", ""() nounwind - ret void -} - -define void @test_fwait() optsize { -; GENERIC-LABEL: test_fwait: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: wait -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fwait: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: wait # sched: [1:0.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fwait: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: wait # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fwait: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: wait # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fwait: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: wait # sched: [2:0.50] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fwait: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: wait # sched: [2:0.50] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fwait: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: wait # sched: [2:0.50] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fwait: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: wait # sched: [2:0.50] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fwait: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: wait # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fwait: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: wait # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fwait: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: wait # sched: [1:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fwait", ""() nounwind - ret void -} - -define void @test_fxam() optsize { -; GENERIC-LABEL: test_fxam: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fxam -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fxam: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fxam # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fxam: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fxam # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fxam: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fxam # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fxam: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fxam # sched: [1:2.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fxam: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fxam # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fxam: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fxam # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fxam: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fxam # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fxam: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fxam # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fxam: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fxam # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fxam: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fxam # sched: [1:1.00] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fxam", ""() nounwind - ret void -} - -define void @test_fxch() optsize { -; GENERIC-LABEL: test_fxch: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fxch %st(1) -; GENERIC-NEXT: fxch %st(3) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fxch: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fxch %st(1) # sched: [1:1.00] -; ATOM-NEXT: fxch %st(3) # sched: [1:1.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fxch: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fxch %st(1) # sched: [1:0.50] -; SLM-NEXT: fxch %st(3) # sched: [1:0.50] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fxch: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fxch %st(1) # sched: [1:0.33] -; SANDY-NEXT: fxch %st(3) # sched: [1:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fxch: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fxch %st(1) # sched: [17:4.00] -; HASWELL-NEXT: fxch %st(3) # sched: [17:4.00] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fxch: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fxch %st(1) # sched: [14:4.00] -; BROADWELL-NEXT: fxch %st(3) # sched: [14:4.00] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fxch: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fxch %st(1) # sched: [17:4.00] -; SKYLAKE-NEXT: fxch %st(3) # sched: [17:4.00] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fxch: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fxch %st(1) # sched: [17:4.00] -; SKX-NEXT: fxch %st(3) # sched: [17:4.00] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fxch: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fxch %st(1) # sched: [1:0.50] -; BDVER2-NEXT: fxch %st(3) # sched: [1:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fxch: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fxch %st(1) # sched: [1:0.50] -; BTVER2-NEXT: fxch %st(3) # sched: [1:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fxch: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fxch %st(1) # sched: [1:0.25] -; ZNVER1-NEXT: fxch %st(3) # sched: [1:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fxch \0A\09 fxch %st(3)", ""() nounwind - ret void -} - -define void @test_fxrstor_fxsave(i8* %a0) optsize { -; GENERIC-LABEL: test_fxrstor_fxsave: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fxrstor (%eax) -; GENERIC-NEXT: fxsave (%eax) -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fxrstor_fxsave: -; ATOM: # %bb.0: -; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] -; ATOM-NEXT: #APP -; ATOM-NEXT: fxrstor (%eax) # sched: [141:70.50] -; ATOM-NEXT: fxsave (%eax) # sched: [140:70.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fxrstor_fxsave: -; SLM: # %bb.0: -; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; SLM-NEXT: #APP -; SLM-NEXT: fxrstor (%eax) # sched: [100:1.00] -; SLM-NEXT: fxsave (%eax) # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fxrstor_fxsave: -; SANDY: # %bb.0: -; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SANDY-NEXT: #APP -; SANDY-NEXT: fxrstor (%eax) # sched: [5:2.00] -; SANDY-NEXT: fxsave (%eax) # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fxrstor_fxsave: -; HASWELL: # %bb.0: -; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fxrstor (%eax) # sched: [64:16.50] -; HASWELL-NEXT: fxsave (%eax) # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fxrstor_fxsave: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fxrstor (%eax) # sched: [63:16.50] -; BROADWELL-NEXT: fxsave (%eax) # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fxrstor_fxsave: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fxrstor (%eax) # sched: [63:16.50] -; SKYLAKE-NEXT: fxsave (%eax) # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fxrstor_fxsave: -; SKX: # %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; SKX-NEXT: #APP -; SKX-NEXT: fxrstor (%eax) # sched: [63:16.50] -; SKX-NEXT: fxsave (%eax) # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fxrstor_fxsave: -; BDVER2: # %bb.0: -; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fxrstor (%eax) # sched: [100:0.50] -; BDVER2-NEXT: fxsave (%eax) # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fxrstor_fxsave: -; BTVER2: # %bb.0: -; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00] -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fxrstor (%eax) # sched: [100:0.50] -; BTVER2-NEXT: fxsave (%eax) # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fxrstor_fxsave: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50] -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fxrstor (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: fxsave (%eax) # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fxrstor $0 \0A\09 fxsave $0", "*m"(i8 *%a0) nounwind - ret void -} - -define void @test_fxtract() optsize { -; GENERIC-LABEL: test_fxtract: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fxtract -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fxtract: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fxtract # sched: [25:12.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fxtract: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fxtract # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fxtract: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fxtract # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fxtract: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fxtract # sched: [15:4.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fxtract: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fxtract # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fxtract: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fxtract # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fxtract: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fxtract # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fxtract: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fxtract # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fxtract: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fxtract # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fxtract: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fxtract # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fxtract", ""() nounwind - ret void -} - -define void @test_fyl2x() optsize { -; GENERIC-LABEL: test_fyl2x: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fyl2x -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fyl2x: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fyl2x # sched: [146:73.00] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fyl2x: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fyl2x # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fyl2x: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fyl2x # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fyl2x: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fyl2x # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fyl2x: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fyl2x # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fyl2x: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fyl2x # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fyl2x: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fyl2x # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fyl2x: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fyl2x # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fyl2x: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fyl2x # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fyl2x: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fyl2x # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fyl2x", ""() nounwind - ret void -} - -define void @test_fyl2xp1() optsize { -; GENERIC-LABEL: test_fyl2xp1: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: fyl2xp1 -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retl -; -; ATOM-LABEL: test_fyl2xp1: -; ATOM: # %bb.0: -; ATOM-NEXT: #APP -; ATOM-NEXT: fyl2xp1 # sched: [147:73.50] -; ATOM-NEXT: #NO_APP -; ATOM-NEXT: retl # sched: [79:39.50] -; -; SLM-LABEL: test_fyl2xp1: -; SLM: # %bb.0: -; SLM-NEXT: #APP -; SLM-NEXT: fyl2xp1 # sched: [100:1.00] -; SLM-NEXT: #NO_APP -; SLM-NEXT: retl # sched: [4:1.00] -; -; SANDY-LABEL: test_fyl2xp1: -; SANDY: # %bb.0: -; SANDY-NEXT: #APP -; SANDY-NEXT: fyl2xp1 # sched: [100:0.33] -; SANDY-NEXT: #NO_APP -; SANDY-NEXT: retl # sched: [6:1.00] -; -; HASWELL-LABEL: test_fyl2xp1: -; HASWELL: # %bb.0: -; HASWELL-NEXT: #APP -; HASWELL-NEXT: fyl2xp1 # sched: [100:0.25] -; HASWELL-NEXT: #NO_APP -; HASWELL-NEXT: retl # sched: [7:1.00] -; -; BROADWELL-LABEL: test_fyl2xp1: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fyl2xp1 # sched: [100:0.25] -; BROADWELL-NEXT: #NO_APP -; BROADWELL-NEXT: retl # sched: [6:0.50] -; -; SKYLAKE-LABEL: test_fyl2xp1: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fyl2xp1 # sched: [100:0.25] -; SKYLAKE-NEXT: #NO_APP -; SKYLAKE-NEXT: retl # sched: [6:0.50] -; -; SKX-LABEL: test_fyl2xp1: -; SKX: # %bb.0: -; SKX-NEXT: #APP -; SKX-NEXT: fyl2xp1 # sched: [100:0.25] -; SKX-NEXT: #NO_APP -; SKX-NEXT: retl # sched: [6:0.50] -; -; BDVER2-LABEL: test_fyl2xp1: -; BDVER2: # %bb.0: -; BDVER2-NEXT: #APP -; BDVER2-NEXT: fyl2xp1 # sched: [100:0.50] -; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [5:1.00] -; -; BTVER2-LABEL: test_fyl2xp1: -; BTVER2: # %bb.0: -; BTVER2-NEXT: #APP -; BTVER2-NEXT: fyl2xp1 # sched: [100:0.50] -; BTVER2-NEXT: #NO_APP -; BTVER2-NEXT: retl # sched: [4:1.00] -; -; ZNVER1-LABEL: test_fyl2xp1: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: #APP -; ZNVER1-NEXT: fyl2xp1 # sched: [100:0.25] -; ZNVER1-NEXT: #NO_APP -; ZNVER1-NEXT: retl # sched: [1:0.50] - tail call void asm sideeffect "fyl2xp1", ""() nounwind - ret void -} Index: test/CodeGen/X86/xop-schedule.ll =================================================================== --- test/CodeGen/X86/xop-schedule.ll +++ test/CodeGen/X86/xop-schedule.ll @@ -1,1818 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+xop | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4 - -define void @test_vfrczpd(<2 x double> %a0, <4 x double> %a1, <2 x double> *%a2, <4 x double> *%a3) { -; GENERIC-LABEL: test_vfrczpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfrczpd %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vfrczpd %ymm1, %ymm1 # sched: [3:1.00] -; GENERIC-NEXT: vfrczpd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: vfrczpd (%rsi), %ymm1 # sched: [10:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfrczpd: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfrczpd %xmm0, %xmm0 # sched: [10:1.00] -; BDVER12-NEXT: vfrczpd %ymm1, %ymm1 # sched: [10:2.00] -; BDVER12-NEXT: vfrczpd (%rdi), %xmm0 # sched: [15:1.00] -; BDVER12-NEXT: vfrczpd (%rsi), %ymm1 # sched: [15:2.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vfrczpd: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vfrczpd %xmm0, %xmm0 -; BDVER3-NEXT: vfrczpd %ymm1, %ymm1 -; BDVER3-NEXT: vfrczpd (%rdi), %xmm0 -; BDVER3-NEXT: vfrczpd (%rsi), %ymm1 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: vzeroupper -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vfrczpd: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vfrczpd %xmm0, %xmm0 -; BDVER4-NEXT: vfrczpd %ymm1, %ymm1 -; BDVER4-NEXT: vfrczpd (%rdi), %xmm0 -; BDVER4-NEXT: vfrczpd (%rsi), %ymm1 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: vzeroupper -; BDVER4-NEXT: retq - call void asm sideeffect "vfrczpd $0, $0 \0a\09 vfrczpd $1, $1 \0a\09 vfrczpd $2, $0 \0a\09 vfrczpd $3, $1", "x,x,*m,*m"(<2 x double> %a0, <4 x double> %a1, <2 x double> *%a2, <4 x double> *%a3) - ret void -} - -define void @test_vfrczps(<4 x float> %a0, <4 x double> %a1, <4 x float> *%a2, <4 x double> *%a3) { -; GENERIC-LABEL: test_vfrczps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfrczps %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vfrczps %ymm1, %ymm1 # sched: [3:1.00] -; GENERIC-NEXT: vfrczps (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: vfrczps (%rsi), %ymm1 # sched: [10:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfrczps: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfrczps %xmm0, %xmm0 # sched: [10:1.00] -; BDVER12-NEXT: vfrczps %ymm1, %ymm1 # sched: [10:2.00] -; BDVER12-NEXT: vfrczps (%rdi), %xmm0 # sched: [15:1.00] -; BDVER12-NEXT: vfrczps (%rsi), %ymm1 # sched: [15:2.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vfrczps: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vfrczps %xmm0, %xmm0 -; BDVER3-NEXT: vfrczps %ymm1, %ymm1 -; BDVER3-NEXT: vfrczps (%rdi), %xmm0 -; BDVER3-NEXT: vfrczps (%rsi), %ymm1 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: vzeroupper -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vfrczps: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vfrczps %xmm0, %xmm0 -; BDVER4-NEXT: vfrczps %ymm1, %ymm1 -; BDVER4-NEXT: vfrczps (%rdi), %xmm0 -; BDVER4-NEXT: vfrczps (%rsi), %ymm1 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: vzeroupper -; BDVER4-NEXT: retq - call void asm sideeffect "vfrczps $0, $0 \0a\09 vfrczps $1, $1 \0a\09 vfrczps $2, $0 \0a\09 vfrczps $3, $1", "x,x,*m,*m"(<4 x float> %a0, <4 x double> %a1, <4 x float> *%a2, <4 x double> *%a3) - ret void -} - -define void @test_vfrczsd(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_vfrczsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfrczsd %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vfrczsd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfrczsd: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfrczsd %xmm0, %xmm0 # sched: [10:1.00] -; BDVER12-NEXT: vfrczsd (%rdi), %xmm0 # sched: [15:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vfrczsd: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vfrczsd %xmm0, %xmm0 -; BDVER3-NEXT: vfrczsd (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vfrczsd: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vfrczsd %xmm0, %xmm0 -; BDVER4-NEXT: vfrczsd (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vfrczsd $0, $0 \0a\09 vfrczsd $1, $0", "x,*m"(<2 x double> %a0, <2 x double> *%a1) - ret void -} - -define void @test_vfrczss(<4 x float> %a0, <4 x double> *%a1) { -; GENERIC-LABEL: test_vfrczss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vfrczss %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vfrczss (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vfrczss: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfrczss %xmm0, %xmm0 # sched: [10:1.00] -; BDVER12-NEXT: vfrczss (%rdi), %xmm0 # sched: [15:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vfrczss: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vfrczss %xmm0, %xmm0 -; BDVER3-NEXT: vfrczss (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vfrczss: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vfrczss %xmm0, %xmm0 -; BDVER4-NEXT: vfrczss (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vfrczss $0, $0 \0a\09 vfrczss $1, $0", "x,*m"(<4 x float> %a0, <4 x double> *%a1) - ret void -} - -define void @test_vpcmov_128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpcmov_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpcmov_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpcmov_128: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpcmov_128: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpcmov $2, $1, $0, $0 \0a\09 vpcmov $3, $1, $0, $0 \0a\09 vpcmov $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i64> *%a3) { -; GENERIC-LABEL: test_vpcmov_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpcmov_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.50] -; BDVER12-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [7:1.00] -; BDVER12-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpcmov_256: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER3-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER3-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: vzeroupper -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpcmov_256: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER4-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER4-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: vzeroupper -; BDVER4-NEXT: retq - call void asm sideeffect "vpcmov $2, $1, $0, $0 \0a\09 vpcmov $3, $1, $0, $0 \0a\09 vpcmov $2, $3, $0, $0", "x,x,x,*m"(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i64> *%a3) - ret void -} - -define void @test_vpcom(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_vpcom: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpcom: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpcom: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpcom: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpcomb $3, $1, $0, $0 \0a\09 vpcomd $3, $1, $0, $0 \0a\09 vpcomq $3, $1, $0, $0 \0a\09 vpcomw $3, $1, $0, $0 \0a\09 vpcomb $3, $2, $0, $0 \0a\09 vpcomd $3, $2, $0, $0 \0a\09 vpcomq $3, $2, $0, $0 \0a\09 vpcomw $3, $2, $0, $0", "x,x,*m,i"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2, i8 3) - ret void -} - -define void @test_vpcomu(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_vpcomu: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpcomu: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpcomu: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpcomu: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpcomub $3, $1, $0, $0 \0a\09 vpcomud $3, $1, $0, $0 \0a\09 vpcomuq $3, $1, $0, $0 \0a\09 vpcomuw $3, $1, $0, $0 \0a\09 vpcomub $3, $2, $0, $0 \0a\09 vpcomud $3, $2, $0, $0 \0a\09 vpcomuq $3, $2, $0, $0 \0a\09 vpcomuw $3, $2, $0, $0", "x,x,*m,i"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2, i8 3) - ret void -} - -define void @test_vpermil2pd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { -; GENERIC-LABEL: test_vpermil2pd_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpermil2pd_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BDVER12-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BDVER12-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [8:2.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpermil2pd_128: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpermil2pd_128: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpermil2pd $4, $2, $1, $0, $0 \0a\09 vpermil2pd $4, $2, $3, $0, $0 \0a\09 vpermil2pd $4, $3, $1, $0, $0", "x,x,x,*m,i"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3, i8 3) - ret void -} - -define void @test_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) { -; GENERIC-LABEL: test_vpermil2pd_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpermil2pd_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00] -; BDVER12-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00] -; BDVER12-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:3.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpermil2pd_256: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER3-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 -; BDVER3-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: vzeroupper -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpermil2pd_256: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER4-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 -; BDVER4-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: vzeroupper -; BDVER4-NEXT: retq - call void asm sideeffect "vpermil2pd $4, $2, $1, $0, $0 \0a\09 vpermil2pd $4, $2, $3, $0, $0 \0a\09 vpermil2pd $4, $3, $1, $0, $0", "x,x,x,*m,i"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3, i8 3) - ret void -} - -define void @test_vpermil2ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) { -; GENERIC-LABEL: test_vpermil2ps_128: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpermil2ps_128: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BDVER12-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BDVER12-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [8:2.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpermil2ps_128: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpermil2ps_128: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpermil2ps $4, $2, $1, $0, $0 \0a\09 vpermil2ps $4, $2, $3, $0, $0 \0a\09 vpermil2ps $4, $3, $1, $0, $0", "x,x,x,*m,i"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3, i8 3) - ret void -} - -define void @test_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) { -; GENERIC-LABEL: test_vpermil2ps_256: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; GENERIC-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: vzeroupper # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpermil2ps_256: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00] -; BDVER12-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00] -; BDVER12-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:3.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [46:4.00] -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpermil2ps_256: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER3-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 -; BDVER3-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: vzeroupper -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpermil2ps_256: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 -; BDVER4-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 -; BDVER4-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: vzeroupper -; BDVER4-NEXT: retq - call void asm sideeffect "vpermil2ps $4, $2, $1, $0, $0 \0a\09 vpermil2ps $4, $2, $3, $0, $0 \0a\09 vpermil2ps $4, $3, $1, $0, $0", "x,x,x,*m,i"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3, i8 3) - ret void -} - -define void @test_vphaddbd(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphaddbd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphaddbd %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphaddbd (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphaddbd: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddbd %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphaddbd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphaddbd: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphaddbd %xmm0, %xmm0 -; BDVER3-NEXT: vphaddbd (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphaddbd: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphaddbd %xmm0, %xmm0 -; BDVER4-NEXT: vphaddbd (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphaddbd $0, $0 \0a\09 vphaddbd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphaddbq(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphaddbq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphaddbq %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphaddbq (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphaddbq: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddbq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphaddbq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphaddbq: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphaddbq %xmm0, %xmm0 -; BDVER3-NEXT: vphaddbq (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphaddbq: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphaddbq %xmm0, %xmm0 -; BDVER4-NEXT: vphaddbq (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphaddbq $0, $0 \0a\09 vphaddbq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphaddbw(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphaddbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphaddbw %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphaddbw (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphaddbw: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddbw %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphaddbw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphaddbw: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphaddbw %xmm0, %xmm0 -; BDVER3-NEXT: vphaddbw (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphaddbw: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphaddbw %xmm0, %xmm0 -; BDVER4-NEXT: vphaddbw (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphaddbw $0, $0 \0a\09 vphaddbw $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphadddq(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphadddq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphadddq %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphadddq (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphadddq: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphadddq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphadddq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphadddq: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphadddq %xmm0, %xmm0 -; BDVER3-NEXT: vphadddq (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphadddq: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphadddq %xmm0, %xmm0 -; BDVER4-NEXT: vphadddq (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphadddq $0, $0 \0a\09 vphadddq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphaddubd(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphaddubd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphaddubd %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphaddubd (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphaddubd: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddubd %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphaddubd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphaddubd: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphaddubd %xmm0, %xmm0 -; BDVER3-NEXT: vphaddubd (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphaddubd: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphaddubd %xmm0, %xmm0 -; BDVER4-NEXT: vphaddubd (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphaddubd $0, $0 \0a\09 vphaddubd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphaddubq(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphaddubq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphaddubq %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphaddubq (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphaddubq: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddubq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphaddubq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphaddubq: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphaddubq %xmm0, %xmm0 -; BDVER3-NEXT: vphaddubq (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphaddubq: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphaddubq %xmm0, %xmm0 -; BDVER4-NEXT: vphaddubq (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphaddubq $0, $0 \0a\09 vphaddubq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphaddubw(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphaddubw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphaddubw %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphaddubw (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphaddubw: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddubw %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphaddubw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphaddubw: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphaddubw %xmm0, %xmm0 -; BDVER3-NEXT: vphaddubw (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphaddubw: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphaddubw %xmm0, %xmm0 -; BDVER4-NEXT: vphaddubw (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphaddubw $0, $0 \0a\09 vphaddubw $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphaddudq(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphaddudq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphaddudq %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphaddudq (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphaddudq: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddudq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphaddudq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphaddudq: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphaddudq %xmm0, %xmm0 -; BDVER3-NEXT: vphaddudq (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphaddudq: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphaddudq %xmm0, %xmm0 -; BDVER4-NEXT: vphaddudq (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphaddudq $0, $0 \0a\09 vphaddudq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphadduwd(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphadduwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphadduwd %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphadduwd (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphadduwd: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphadduwd %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphadduwd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphadduwd: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphadduwd %xmm0, %xmm0 -; BDVER3-NEXT: vphadduwd (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphadduwd: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphadduwd %xmm0, %xmm0 -; BDVER4-NEXT: vphadduwd (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphadduwd $0, $0 \0a\09 vphadduwd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphadduwq(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphadduwq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphadduwq %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphadduwq (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphadduwq: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphadduwq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphadduwq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphadduwq: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphadduwq %xmm0, %xmm0 -; BDVER3-NEXT: vphadduwq (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphadduwq: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphadduwq %xmm0, %xmm0 -; BDVER4-NEXT: vphadduwq (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphadduwq $0, $0 \0a\09 vphadduwq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphaddwd(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphaddwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphaddwd %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphaddwd (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphaddwd: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddwd %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphaddwd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphaddwd: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphaddwd %xmm0, %xmm0 -; BDVER3-NEXT: vphaddwd (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphaddwd: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphaddwd %xmm0, %xmm0 -; BDVER4-NEXT: vphaddwd (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphaddwd $0, $0 \0a\09 vphaddwd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphaddwq(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphaddwq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphaddwq %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphaddwq (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphaddwq: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddwq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphaddwq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphaddwq: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphaddwq %xmm0, %xmm0 -; BDVER3-NEXT: vphaddwq (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphaddwq: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphaddwq %xmm0, %xmm0 -; BDVER4-NEXT: vphaddwq (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphaddwq $0, $0 \0a\09 vphaddwq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphsubbw(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphsubbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphsubbw %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphsubbw (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphsubbw: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphsubbw %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphsubbw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphsubbw: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphsubbw %xmm0, %xmm0 -; BDVER3-NEXT: vphsubbw (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphsubbw: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphsubbw %xmm0, %xmm0 -; BDVER4-NEXT: vphsubbw (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphsubbw $0, $0 \0a\09 vphsubbw $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphsubdq(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphsubdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphsubdq %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphsubdq (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphsubdq: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphsubdq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphsubdq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphsubdq: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphsubdq %xmm0, %xmm0 -; BDVER3-NEXT: vphsubdq (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphsubdq: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphsubdq %xmm0, %xmm0 -; BDVER4-NEXT: vphsubdq (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphsubdq $0, $0 \0a\09 vphsubdq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vphsubwd(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_vphsubwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vphsubwd %xmm0, %xmm0 # sched: [3:1.50] -; GENERIC-NEXT: vphsubwd (%rdi), %xmm0 # sched: [9:1.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vphsubwd: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphsubwd %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vphsubwd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vphsubwd: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vphsubwd %xmm0, %xmm0 -; BDVER3-NEXT: vphsubwd (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vphsubwd: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vphsubwd %xmm0, %xmm0 -; BDVER4-NEXT: vphsubwd (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vphsubwd $0, $0 \0a\09 vphsubwd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1) - ret void -} - -define void @test_vpmacsdd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpmacsdd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpmacsdd: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BDVER12-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpmacsdd: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpmacsdd: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpmacsdd $2, $1, $0, $0 \0a\09 vpmacsdd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vpmacsdqh(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpmacsdqh: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpmacsdqh: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] -; BDVER12-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpmacsdqh: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpmacsdqh: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpmacsdqh $2, $1, $0, $0 \0a\09 vpmacsdqh $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vpmacsdql(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpmacsdql: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpmacsdql: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] -; BDVER12-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpmacsdql: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpmacsdql: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpmacsdql $2, $1, $0, $0 \0a\09 vpmacsdql $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vpmacssdd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpmacssdd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpmacssdd: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BDVER12-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpmacssdd: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpmacssdd: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpmacssdd $2, $1, $0, $0 \0a\09 vpmacssdd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vpmacssdqh(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpmacssdqh: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpmacssdqh: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] -; BDVER12-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpmacssdqh: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpmacssdqh: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpmacssdqh $2, $1, $0, $0 \0a\09 vpmacssdqh $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vpmacssdql(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpmacssdql: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpmacssdql: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] -; BDVER12-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpmacssdql: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpmacssdql: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpmacssdql $2, $1, $0, $0 \0a\09 vpmacssdql $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vpmacsswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpmacsswd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpmacsswd: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER12-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpmacsswd: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpmacsswd: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpmacsswd $2, $1, $0, $0 \0a\09 vpmacsswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vpmacssww(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpmacssww: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpmacssww: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER12-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpmacssww: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpmacssww: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpmacssww $2, $1, $0, $0 \0a\09 vpmacssww $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vpmacswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpmacswd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpmacswd: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER12-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpmacswd: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpmacswd: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpmacswd $2, $1, $0, $0 \0a\09 vpmacswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vpmacsww(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpmacsww: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpmacsww: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER12-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpmacsww: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpmacsww: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpmacsww $2, $1, $0, $0 \0a\09 vpmacsww $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vpmadcsswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpmadcsswd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpmadcsswd: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER12-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpmadcsswd: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpmadcsswd: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpmadcsswd $2, $1, $0, $0 \0a\09 vpmadcsswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vpmadcswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpmadcswd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpmadcswd: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER12-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpmadcswd: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpmadcswd: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpmadcswd $2, $1, $0, $0 \0a\09 vpmadcswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vpperm(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) { -; GENERIC-LABEL: test_vpperm: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpperm: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; BDVER12-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [8:2.00] -; BDVER12-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpperm: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpperm: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpperm $2, $1, $0, $0 \0A\09 vpperm $3, $1, $0, $0 \0A\09 vpperm $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) - ret void -} - -define void @test_vprot(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_vprot: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vprotb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vprotd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vprotq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vprotw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vprotb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vprotd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vprotq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vprotw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vprotb %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vprotd %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vprotq %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vprotw %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vprotb $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vprotd $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vprotq $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vprotw $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vprotb $7, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vprotd $7, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vprotq $7, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vprotw $7, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vprot: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vprotb %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER12-NEXT: vprotd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER12-NEXT: vprotq %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER12-NEXT: vprotw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER12-NEXT: vprotb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vprotd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vprotq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vprotw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vprotb %xmm0, (%rdi), %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vprotd %xmm0, (%rdi), %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vprotq %xmm0, (%rdi), %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vprotw %xmm0, (%rdi), %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vprotb $7, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vprotd $7, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vprotq $7, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vprotw $7, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER12-NEXT: vprotb $7, (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: vprotd $7, (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: vprotq $7, (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: vprotw $7, (%rdi), %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vprot: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vprotb %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vprotd %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vprotq %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vprotw %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vprotb (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vprotd (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vprotq (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vprotw (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vprotb %xmm0, (%rdi), %xmm0 -; BDVER3-NEXT: vprotd %xmm0, (%rdi), %xmm0 -; BDVER3-NEXT: vprotq %xmm0, (%rdi), %xmm0 -; BDVER3-NEXT: vprotw %xmm0, (%rdi), %xmm0 -; BDVER3-NEXT: vprotb $7, %xmm0, %xmm0 -; BDVER3-NEXT: vprotd $7, %xmm0, %xmm0 -; BDVER3-NEXT: vprotq $7, %xmm0, %xmm0 -; BDVER3-NEXT: vprotw $7, %xmm0, %xmm0 -; BDVER3-NEXT: vprotb $7, (%rdi), %xmm0 -; BDVER3-NEXT: vprotd $7, (%rdi), %xmm0 -; BDVER3-NEXT: vprotq $7, (%rdi), %xmm0 -; BDVER3-NEXT: vprotw $7, (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vprot: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vprotb %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vprotd %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vprotq %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vprotw %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vprotb (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vprotd (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vprotq (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vprotw (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vprotb %xmm0, (%rdi), %xmm0 -; BDVER4-NEXT: vprotd %xmm0, (%rdi), %xmm0 -; BDVER4-NEXT: vprotq %xmm0, (%rdi), %xmm0 -; BDVER4-NEXT: vprotw %xmm0, (%rdi), %xmm0 -; BDVER4-NEXT: vprotb $7, %xmm0, %xmm0 -; BDVER4-NEXT: vprotd $7, %xmm0, %xmm0 -; BDVER4-NEXT: vprotq $7, %xmm0, %xmm0 -; BDVER4-NEXT: vprotw $7, %xmm0, %xmm0 -; BDVER4-NEXT: vprotb $7, (%rdi), %xmm0 -; BDVER4-NEXT: vprotd $7, (%rdi), %xmm0 -; BDVER4-NEXT: vprotq $7, (%rdi), %xmm0 -; BDVER4-NEXT: vprotw $7, (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vprotb $1, $0, $0 \0A\09 vprotd $1, $0, $0 \0A\09 vprotq $1, $0, $0 \0A\09 vprotw $1, $0, $0 \0A\09 vprotb $2, $0, $0 \0A\09 vprotd $2, $0, $0 \0A\09 vprotq $2, $0, $0 \0A\09 vprotw $2, $0, $0 \0A\09 vprotb $0, $2, $0 \0A\09 vprotd $0, $2, $0 \0A\09 vprotq $0, $2, $0 \0A\09 vprotw $0, $2, $0 \0A\09 vprotb $3, $0, $0 \0A\09 vprotd $3, $0, $0 \0A\09 vprotq $3, $0, $0 \0A\09 vprotw $3, $0, $0 \0A\09 vprotb $3, $2, $0 \0A\09 vprotd $3, $2, $0 \0A\09 vprotq $3, $2, $0 \0A\09 vprotw $3, $2, $0", "x,x,*m,i"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2, i8 7) - ret void -} - -define void @test_vpsha(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_vpsha: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpshab %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpshad %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpshaq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpshaw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpshab (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshad (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshaq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshaw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshab %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshad %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshaq %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshaw %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpsha: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpshab %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER12-NEXT: vpshad %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER12-NEXT: vpshaq %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER12-NEXT: vpshaw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER12-NEXT: vpshab (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshad (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshaq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshaw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshab %xmm0, (%rdi), %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshad %xmm0, (%rdi), %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshaq %xmm0, (%rdi), %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshaw %xmm0, (%rdi), %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpsha: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpshab %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpshad %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpshaq %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpshaw %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpshab (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpshad (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpshaq (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpshaw (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpshab %xmm0, (%rdi), %xmm0 -; BDVER3-NEXT: vpshad %xmm0, (%rdi), %xmm0 -; BDVER3-NEXT: vpshaq %xmm0, (%rdi), %xmm0 -; BDVER3-NEXT: vpshaw %xmm0, (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpsha: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpshab %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpshad %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpshaq %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpshaw %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpshab (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpshad (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpshaq (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpshaw (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpshab %xmm0, (%rdi), %xmm0 -; BDVER4-NEXT: vpshad %xmm0, (%rdi), %xmm0 -; BDVER4-NEXT: vpshaq %xmm0, (%rdi), %xmm0 -; BDVER4-NEXT: vpshaw %xmm0, (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpshab $1, $0, $0 \0A\09 vpshad $1, $0, $0 \0A\09 vpshaq $1, $0, $0 \0A\09 vpshaw $1, $0, $0 \0A\09 vpshab $2, $0, $0 \0A\09 vpshad $2, $0, $0 \0A\09 vpshaq $2, $0, $0 \0A\09 vpshaw $2, $0, $0 \0A\09 vpshab $0, $2, $0 \0A\09 vpshad $0, $2, $0 \0A\09 vpshaq $0, $2, $0 \0A\09 vpshaw $0, $2, $0", "x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) - ret void -} - -define void @test_vpshl(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_vpshl: -; GENERIC: # %bb.0: -; GENERIC-NEXT: #APP -; GENERIC-NEXT: vpshlb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpshld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpshlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpshlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpshlb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshlb %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshld %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshlq %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: vpshlw %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: #NO_APP -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; BDVER12-LABEL: test_vpshl: -; BDVER12: # %bb.0: -; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpshlb %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER12-NEXT: vpshld %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER12-NEXT: vpshlq %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER12-NEXT: vpshlw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER12-NEXT: vpshlb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshld (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshlq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshlw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshlb %xmm0, (%rdi), %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshld %xmm0, (%rdi), %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshlq %xmm0, (%rdi), %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: vpshlw %xmm0, (%rdi), %xmm0 # sched: [8:0.50] -; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [5:1.00] -; -; BDVER3-LABEL: test_vpshl: -; BDVER3: # %bb.0: -; BDVER3-NEXT: #APP -; BDVER3-NEXT: vpshlb %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpshld %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpshlq %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpshlw %xmm1, %xmm0, %xmm0 -; BDVER3-NEXT: vpshlb (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpshld (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpshlq (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpshlw (%rdi), %xmm0, %xmm0 -; BDVER3-NEXT: vpshlb %xmm0, (%rdi), %xmm0 -; BDVER3-NEXT: vpshld %xmm0, (%rdi), %xmm0 -; BDVER3-NEXT: vpshlq %xmm0, (%rdi), %xmm0 -; BDVER3-NEXT: vpshlw %xmm0, (%rdi), %xmm0 -; BDVER3-NEXT: #NO_APP -; BDVER3-NEXT: retq -; -; BDVER4-LABEL: test_vpshl: -; BDVER4: # %bb.0: -; BDVER4-NEXT: #APP -; BDVER4-NEXT: vpshlb %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpshld %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpshlq %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpshlw %xmm1, %xmm0, %xmm0 -; BDVER4-NEXT: vpshlb (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpshld (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpshlq (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpshlw (%rdi), %xmm0, %xmm0 -; BDVER4-NEXT: vpshlb %xmm0, (%rdi), %xmm0 -; BDVER4-NEXT: vpshld %xmm0, (%rdi), %xmm0 -; BDVER4-NEXT: vpshlq %xmm0, (%rdi), %xmm0 -; BDVER4-NEXT: vpshlw %xmm0, (%rdi), %xmm0 -; BDVER4-NEXT: #NO_APP -; BDVER4-NEXT: retq - call void asm sideeffect "vpshlb $1, $0, $0 \0A\09 vpshld $1, $0, $0 \0A\09 vpshlq $1, $0, $0 \0A\09 vpshlw $1, $0, $0 \0A\09 vpshlb $2, $0, $0 \0A\09 vpshld $2, $0, $0 \0A\09 vpshlq $2, $0, $0 \0A\09 vpshlw $2, $0, $0 \0A\09 vpshlb $0, $2, $0 \0A\09 vpshld $0, $2, $0 \0A\09 vpshlq $0, $2, $0 \0A\09 vpshlw $0, $2, $0", "x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) - ret void -} Index: tools/llvm-exegesis/llvm-exegesis.cpp =================================================================== --- tools/llvm-exegesis/llvm-exegesis.cpp +++ tools/llvm-exegesis/llvm-exegesis.cpp @@ -188,8 +188,7 @@ // Implementation of the llvm::MCStreamer interface. We only care about // instructions. void EmitInstruction(const llvm::MCInst &Instruction, - const llvm::MCSubtargetInfo &STI, - bool PrintSchedInfo) override { + const llvm::MCSubtargetInfo &STI) override { Result->Instructions.push_back(Instruction); } Index: tools/llvm-mca/CodeRegionGenerator.cpp =================================================================== --- tools/llvm-mca/CodeRegionGenerator.cpp +++ tools/llvm-mca/CodeRegionGenerator.cpp @@ -48,8 +48,7 @@ // We only want to intercept the emission of new instructions. virtual void EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo & /* unused */, - bool /* unused */) override { + const MCSubtargetInfo &/* unused */) override { Regions.addInstruction(Inst); }